############################
## Weissman & Mikkelborg  ##
## Political Behavior     ##
## 2025                   ## 
############################

## Load packages ----

library(tidyverse)
library(lfe)
library(lmtest)
library(stargazer)
library(sandwich)
library(broom)
library(scales)
library(ggpubr)
library(grid)
library(kableExtra)
library(ggplot2)
library(weights)
library(panelView)
library(dotwhisker)
library(reshape2)
library(RColorBrewer) 
library(estimatr)
library(gridExtra)
# devtools::install_github("slowkow/ggrepel")
library(ggrepel)
library(ggeffects)  
library(fixest)
library(tidycensus)
library(Hmisc)

#
#### Load data and setup ----

df_even <- read.csv("./mc_cces_evens2.update.csv") 

#fix state_dist for 11-1 
df_even.1 <- df_even %>%
  mutate(state_dist==case_when(st==11 & dist==1 ~ "11-1",
                               TRUE ~ state_dist))
# Set up data for regressions with FEs 
rr_median <- median(df_even.1$rr_index, na.rm=T)
df_even2 <- df_even.1 %>% 
  mutate(cong = as.factor(cong),
         state_dist = as.factor(state_dist),
         #nonwhite_mc = as.factor(nonwhite_mc),
         inc_tercile_ideo1 = as.factor(inc_tercile_ideo1),
         inc_tercile_ideo2 = as.factor(inc_tercile_ideo2),
         #inc_tercile_pol = as.factor(inc_tercile_pol),
         mc_seniority = rescale(mc_seniority, to =c(0,1)),
         rr_index_mc = scale(rr_index, scale=FALSE),
         rr_abovemed = case_when(rr_index > rr_median~1,
                                 rr_index <= rr_median~0,
                                 T~NA_real_),
         race_correct_nw = case_when(nonwhite_mc==1&perceived_race%in%c(2,3,4,5)~1,
                                     nonwhite_mc==0&perceived_race==1~1,
                                     nonwhite_mc==1&perceived_race==1~0,
                                     nonwhite_mc==0&perceived_race%in%c(2,3,4,5)~0,
                                     T~NA_real_)) %>% 
  mutate(state_dist = paste0(st, "-", dist)) %>%
  group_by(state_dist, mc_party) %>% 
  mutate(dist_party_mc = cur_group_id()) %>% 
  ungroup() %>%
  #fixing something with black_mc_nw variable 
  mutate(black_mc_nw = case_when(nonwhite_mc == 1 & black_mc != 1 ~ NA_integer_,
                                 TRUE ~ black_mc_nw)) 

# Set up files for whites, Democratic whites, Republican whites, liberal whites, conservative whites 
df_even_w <- df_even2 %>%
  filter(nonwhite == 0)
df_even_w_dem <- df_even2 %>%
  filter(party7 == 1 & nonwhite==0)
df_even_w_rep <- df_even2 %>%
  filter(party7 == 3 & nonwhite==0)
df_even_w_lib <- df_even2 %>%
  filter(ideo_self < .5 & nonwhite==0)
df_even_w_con <- df_even2 %>%
  filter(ideo_self > .5 & nonwhite==0)

# setup with 2014-2022
df_even_w_dem.1422 <- df_even2 %>%
  filter(party7 == 1 & nonwhite==0 & year %in% c("2014", "2016", "2018", "2020", "2022"))
df_even_w_rep.1422 <- df_even2 %>%
  filter(party7 == 3 & nonwhite==0 & year %in% c("2014", "2016", "2018", "2020", "2022"))

#### PAPER ####
## Figure 1: RR over time ----
# restrict to D and R 
df_even_w.2 <- df_even_w %>%
  filter(party7==1 | party7==3) %>%
  mutate(party7 = as.character(party7))

# Calculate means and standard errors for white respondents
average.rr.w <- df_even_w.2 %>%
  select(rr_index, year, party7, weight) %>%
  filter(year != 2008) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = wtd.mean(rr_index, weights = weight, na.rm = TRUE),
    se = sqrt(wtd.var(rr_index, weights = weight, na.rm = TRUE) / sum(!is.na(rr_index))),
    n = sum(!is.na(rr_index)),
    .groups = "drop"
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(
           party7 == 1 ~ "White Democrats",
           party7 == 3 ~ "White Republicans"
         ))

# Create the plot with error bars and larger points
ggplot(average.rr.w, aes(year, avg., group=party7)) +
  # Add error bars
  geom_errorbar(aes(ymin = avg. - 1.96*se, 
                    ymax = avg. + 1.96*se,
                    color = party7),
                width = 0.4) +
  # Increase point size to 3
  geom_point(aes(color=party7, shape=party7), size = 3) + 
  geom_line(aes(color=party7)) +
  theme_minimal() +
  scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024)) +
  scale_color_grey() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12, angle=0, vjust=.5),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        axis.text = element_text(size=10),
        panel.grid.minor = element_blank()) +
  labs(x = "Year", y = "Average Racial \nResentment") +
  ylim(0,1)

#
## Figure 2: Effect by year ---- 

# Models by year 
# Democratic respondent model by year 
mod1.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

# Republican respondent model by year 
mod2.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc:factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_rep, weights = df_even_w_rep$weight)

# create data frames with regression output of interest
mod1.yearly_df <- broom::tidy(mod1.yearly) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                "factor(year)2010:nonwhite_mc",
                                                                "factor(year)2012:nonwhite_mc",
                                                                "factor(year)2014:nonwhite_mc",
                                                                "factor(year)2016:nonwhite_mc",
                                                                "factor(year)2018:nonwhite_mc",
                                                                "factor(year)2020:nonwhite_mc",
                                                                "factor(year)2022:nonwhite_mc",
                                                                "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Democrats",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

mod2.yearly_df <- broom::tidy(mod2.yearly) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                "factor(year)2010:nonwhite_mc",
                                                                "factor(year)2012:nonwhite_mc",
                                                                "factor(year)2014:nonwhite_mc",
                                                                "factor(year)2016:nonwhite_mc",
                                                                "factor(year)2018:nonwhite_mc",
                                                                "factor(year)2020:nonwhite_mc",
                                                                "factor(year)2022:nonwhite_mc",
                                                                "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Republicans",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

# create tibble with all regression output
models <- rbind(mod1.yearly_df, mod2.yearly_df)

# plot coefficients
yearbyyear <- dwplot(models,
                     vline = geom_vline(
                       xintercept = 0,
                       colour = "grey60",
                       linetype = 2
                     ),
                     dot_args = list(aes(shape = model), size=3),
                     whisker_args = list(aes(color = model))) +
  theme_minimal() + ylab("") +
  #ggtitle("Marginal Effect of POC MC on MC Approval by Constituent Party and Year") +
  xlab("Marginal Effect of \n POC MC on Approval") +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12, angle=0, vjust = 0.5),
        axis.title.y = element_text(size = 12, angle=0, vjust = 0.5),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        axis.text = element_text(size=10),
        panel.grid.minor = element_blank())+
  scale_colour_manual(values = c("darkgrey", "black")) +
  scale_y_discrete(limits=rev) +
  ylab("Year") +
  coord_flip() +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))

yearbyyear

# testing difference between 2020 and 2022 
Z <- (mod1.yearly$coefficients[[17]] - mod1.yearly$coefficients[[16]])/(sqrt((mod1.yearly$se[[17]])^2 + (mod1.yearly$se[[16]])^2))
pval <- 2*pnorm(-abs(Z))
Eq.coeff <- rbind(Z, pval)
Eq.coeff


#
## Figure 3: Knowledge of MC race ----

df_high.knowledge_dem <- df_even_w_dem %>%
  filter(race_correct_nw==1)

df_low.knowledge_dem <- df_even_w_dem %>%
  filter(race_correct_nw==0)

## Basic approval for white dems over time 
# High Knowledge respondents 
mod1.hk <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                | dist_party_mc | 0 | state_dist, data=df_high.knowledge_dem,weights=df_high.knowledge_dem$weight)
# Low Knowledge respondents 
mod1.lk <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                | dist_party_mc | 0 | state_dist, data=df_low.knowledge_dem,weights=df_low.knowledge_dem$weight)

# Create data frames with regression output of interest
# High-Knowledge  
df_dem.hk <- broom::tidy(mod1.hk) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                       "factor(year)2010:nonwhite_mc",
                                                       "factor(year)2012:nonwhite_mc",
                                                       "factor(year)2014:nonwhite_mc",
                                                       "factor(year)2016:nonwhite_mc",
                                                       "factor(year)2018:nonwhite_mc",
                                                       "factor(year)2020:nonwhite_mc",
                                                       "factor(year)2022:nonwhite_mc",
                                                       "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Democrats",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))


# Low-knowledge
df_dem.lk <- broom::tidy(mod1.lk) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                       "factor(year)2010:nonwhite_mc",
                                                       "factor(year)2012:nonwhite_mc",
                                                       "factor(year)2014:nonwhite_mc",
                                                       "factor(year)2016:nonwhite_mc",
                                                       "factor(year)2018:nonwhite_mc",
                                                       "factor(year)2020:nonwhite_mc",
                                                       "factor(year)2022:nonwhite_mc",
                                                       "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Democrats",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

# Need to bind the datasets for face_grid 
models.hk.dem <- df_dem.hk %>% mutate(knowledge = "Accurately Identify MC Race")
models.lk.dem <- df_dem.lk %>% mutate(knowledge = "Inaccurately Identify MC Race")

models.knowledge2 <- rbind(models.lk.dem, models.hk.dem)

mycol<-brewer.pal(5,"Greys")[c(2,5)]

models.knowledge2$knowledge <- factor(models.knowledge2$knowledge, 
                                      levels = c("Accurately Identify MC Race",
                                                 "Inaccurately Identify MC Race"))

knowledge_plot <- dwplot(models.knowledge2,
                         vline = geom_vline(
                           xintercept = 0,
                           colour = "grey60",
                           linetype = 2
                         ),
                         dot_args = list(aes(shape = model), size=3),
                         whisker_args = list(aes(color = model))) +
  theme_bw() + ylab("") +
  #ggtitle("Marginal Effect of POC MC on Approval by Knowledge Level") +
  theme(
    legend.title = element_blank(),
    plot.title = element_text(size = 12, face = "bold"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12, angle=0, vjust=.5),
    legend.text = element_text(size = 10),
    legend.position = "none",
    strip.text = element_text(face="bold", size=12),
    strip.background = element_blank(),
    panel.grid.minor = element_blank()) +
  scale_colour_manual(values = c("black")) +
  scale_y_discrete(limits=rev) +
  xlab("Marginal Effect of\n POC MC on Approval") +
  ylab("Year") +
  coord_flip() +
  facet_grid(scales = "free_y", ~knowledge)+
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))

knowledge_plot

#
## Figure 4: demographics ----
## By respondent gender

# generate 2 subsets for men and women Democrats
df_even_w_dem_women <- df_even_w_dem %>%
  filter(gender==1)
df_even_w_dem_men <- df_even_w_dem %>%
  filter(gender==0)

## bivariate + district-level controls by year 
# Women Dems 
mod.women.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_women, weights=df_even_w_dem_women$weight)
# Men Dems 
mod.men.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                   | dist_party_mc | 0 | state_dist, data=df_even_w_dem_men, weights=df_even_w_dem_men$weight)


# generate table of values to plot 
mod.women_yr_df <- broom::tidy(mod.women.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                  "factor(year)2010:nonwhite_mc",
                                                                  "factor(year)2012:nonwhite_mc",
                                                                  "factor(year)2014:nonwhite_mc",
                                                                  "factor(year)2016:nonwhite_mc",
                                                                  "factor(year)2018:nonwhite_mc",
                                                                  "factor(year)2020:nonwhite_mc",
                                                                  "factor(year)2022:nonwhite_mc",
                                                                  "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Women",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

mod.men_yr_df <- broom::tidy(mod.men.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                              "factor(year)2010:nonwhite_mc",
                                                              "factor(year)2012:nonwhite_mc",
                                                              "factor(year)2014:nonwhite_mc",
                                                              "factor(year)2016:nonwhite_mc",
                                                              "factor(year)2018:nonwhite_mc",
                                                              "factor(year)2020:nonwhite_mc",
                                                              "factor(year)2022:nonwhite_mc",
                                                              "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Men",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

# create tibble with all regression output
models.gender <- rbind(mod.women_yr_df, mod.men_yr_df)

# plot coefficients
year.gender <- dwplot(models.gender,
                      vline = geom_vline(
                        xintercept = 0,
                        colour = "grey60",
                        linetype = 3
                      ),
                      dot_args = list(aes(shape = model), size=3),
                      whisker_args = list(aes(color = model))) +
  theme_bw() + ylab("") +
  #ggtitle("Marginal Effect of POC MC on MC Approval by Respondent Gender and Year") +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 10, face="bold", hjust=.5),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle=0, vjust=.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        panel.grid.minor = element_blank())+
  scale_colour_manual(values = c("darkgrey", "black")) +
  scale_y_discrete(limits=rev) +
  #xlab("Marginal Effect of \nPOC MC on Approval") +
  xlab("") +
  ylab("Year") +
  ggtitle("By Gender") +
  coord_flip() +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model")) +
  xlim(-.2, .15)

year.gender

# By respondent age cohort (https://www.pewtrusts.org/en/research-and-analysis/data-visualizations/2019/defining-our-six-generations)

fig.age <- df_even_w_dem %>%
  mutate(generation = case_when(birthyr < 1928~"Greatest Generation",
                                birthyr < 1946~"Silent Generation",
                                birthyr < 1965~"Baby Boomers",
                                birthyr < 1981~"Generation X",
                                birthyr < 1997~"Millennials",
                                birthyr >= 1997~"Generation Z"),
         generation = factor(generation,
                             levels = c("Greatest Generation",
                                        "Silent Generation",
                                        "Baby Boomers",
                                        "Generation X",
                                        "Millennials",
                                        "Generation Z"),
                             ordered = T)) %>%
  filter(!(generation%in%c("Greatest Generation", "Generation Z"))) %>%
  group_by(generation) %>%
  do(tidy(felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
               | dist_party_mc | 0 | state_dist, weights = .$weight, data=.))) %>%
  filter(term%in%c("factor(year)2008:nonwhite_mc",
                   "factor(year)2010:nonwhite_mc",
                   "factor(year)2012:nonwhite_mc",
                   "factor(year)2014:nonwhite_mc",
                   "factor(year)2016:nonwhite_mc",
                   "factor(year)2018:nonwhite_mc",
                   "factor(year)2020:nonwhite_mc",
                   "factor(year)2022:nonwhite_mc",
                   "factor(year)2024:nonwhite_mc")) %>%
  rename(model = generation) %>%
  mutate(term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"),
         upper = estimate + 1.96 * std.error,
         lower = estimate - 1.96 * std.error) %>% ggplot(aes(x = term, y = estimate)) +
  geom_hline(yintercept = 0, color = "grey60", linetype = 3) +
  scale_colour_grey() +
  geom_point(aes(shape = model, color = model), size = 3, position = position_dodge(0.5)) +
  geom_errorbar(aes(color = model, ymin = lower, ymax = upper), width = 0, position = position_dodge(0.5)) +
  theme_bw() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 10, face="bold", hjust=.5),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle=0, vjust=.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        panel.grid.minor = element_blank())+
  #ylab("Marginal Effect of \nPOC MC on Approval") +
  ylab("") +
  ggtitle("By Generation") +
  xlab("Year") +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))+
  ylim(-.2, .15)

fig.age

# by educational attainment

fig.educ <- df_even_w_dem %>%
  mutate(educ3 = case_when(educ%in%c(1,2)~"HS or less",
                           educ%in%c(3,4)~"Some college",
                           educ%in%c(5,6)~"BA+"),
         educ3 = factor(educ3,
                        levels = c("HS or less",
                                   "Some college",
                                   "BA+"),
                        ordered = T)) %>%
  group_by(educ3) %>%
  do(tidy(felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
               | dist_party_mc | 0 | state_dist, weights = .$weight, data=.))) %>%
  filter(term%in%c("factor(year)2008:nonwhite_mc",
                   "factor(year)2010:nonwhite_mc",
                   "factor(year)2012:nonwhite_mc",
                   "factor(year)2014:nonwhite_mc",
                   "factor(year)2016:nonwhite_mc",
                   "factor(year)2018:nonwhite_mc",
                   "factor(year)2020:nonwhite_mc",
                   "factor(year)2022:nonwhite_mc",
                   "factor(year)2024:nonwhite_mc")) %>%
  rename(model = educ3) %>%
  mutate(term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"),
         upper = estimate + 1.96 * std.error,
         lower = estimate - 1.96 * std.error) %>%
  ggplot(aes(x = term, y = estimate)) +
  geom_hline(yintercept = 0, color = "grey60", linetype = 3) +
  geom_point(aes(shape = model, color = model), size = 3, position = position_dodge(0.5)) +
  geom_errorbar(aes(color = model, ymin = lower, ymax = upper), width = 0, position = position_dodge(0.5)) +
  theme_bw() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 10, face="bold", hjust=.5),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle=0, vjust=.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        panel.grid.minor = element_blank())+
  scale_colour_grey() +
  #ylab("Marginal Effect of \nPOC MC on Approval") +
  ylab("") +
  ggtitle("By Education") +
  xlab("Year") +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))+
  ylim(-.2, .15)

fig.educ

# by above-below median household income

fig.income <- df_even_w_dem %>%
  filter(!is.na(faminc)) %>%
  group_by(year) %>%
  mutate(above_median = case_when(faminc > median(faminc) ~ 1, T~0)) %>%
  ungroup() %>%
  group_by(above_median) %>%
  do(tidy(felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
               | dist_party_mc | 0 | state_dist, weights = .$weight, data=.))) %>%
  filter(term%in%c("factor(year)2008:nonwhite_mc",
                   "factor(year)2010:nonwhite_mc",
                   "factor(year)2012:nonwhite_mc",
                   "factor(year)2014:nonwhite_mc",
                   "factor(year)2016:nonwhite_mc",
                   "factor(year)2018:nonwhite_mc",
                   "factor(year)2020:nonwhite_mc",
                   "factor(year)2022:nonwhite_mc",
                   "factor(year)2024:nonwhite_mc")) %>%
  mutate(term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"),
         upper = estimate + 1.96 * std.error,
         lower = estimate - 1.96 * std.error,
         model = case_when(above_median==0~"Below median income",
                           above_median==1~"Above median income")) %>%
  ggplot(aes(x = term, y = estimate)) +
  geom_hline(yintercept = 0, color = "grey60", linetype = 3) +
  geom_point(aes(shape = model, color = model), size = 3, position = position_dodge(0.5)) +
  geom_errorbar(aes(color = model, ymin = lower, ymax = upper), width = 0, position = position_dodge(0.5)) +
  theme_bw() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 10, face="bold", hjust=.5),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle=0, vjust=.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        panel.grid.minor = element_blank())+
  scale_colour_grey() +
  #ylab("Marginal Effect of \nPOC MC on Approval") +
  ylab("")+
  ggtitle("By Above/Below−Median Income") +
  xlab("Year") +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))+
  ylim(-.2, .15)

fig.income

# combine plots 
combined_plots <- ggarrange(year.gender, fig.age, fig.educ, fig.income)

# add acis
annotate_figure(combined_plots,
                left = text_grob("Marginal Effect of \nPOC MC on Approval", 
                                 rot = 0, 
                                 size = 10))

#
## Table 1: Both RR x POC MC and FIRE x POC MC, main table ---- 

# Start with Racial Resentment 
# flip index 
df_even_w_dem.index <- df_even_w_dem %>%
  mutate(rr_index_flip = 1-rr_index)

# interact POC MC*rr with district-level controls for White Dems 
mod_rr1_flip <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                       cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)

mod_rr3_flip <- felm(approval_rep ~ rr_index_flip + rr_index_flip*black_mc_nw + mc_seniority + mc_gender | 
                       cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)

# calculate number of treated districts in each dataset
temp <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp3 <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated3 <- temp3 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod3_n <- nrow(treated3)


# Now the FIRE scales 
# flip index 
df_even_w_dem.fire <- df_even_w_dem %>%
  # making it so higher values are more liberal (initial coding in cleaning was high values as conservative)
  mutate(fire1_index_flip = 1-fire1,
         fire2_index_flip = 1-fire2) 

# interact POC MC*rr with district-level controls for White Dems 
mod1_fire1_flip <- felm(approval_rep ~ fire1_index_flip + fire1_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod2_fire1_flip <- felm(approval_rep ~ fire1_index_flip + fire1_index_flip*black_mc_nw + mc_seniority + mc_gender | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod3_fire2_flip <- felm(approval_rep ~ fire2_index_flip + fire2_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod4_fire2_flip <- felm(approval_rep ~ fire2_index_flip + fire2_index_flip*black_mc_nw + mc_seniority + mc_gender | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

# calculate number of treated districts in each dataset
temp4 <- df_even_w_dem.fire %>%
  filter(!is.na(fire1)&!is.na(fire2)&!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated4 <- temp4 %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod4_n <- nrow(treated4)

temp5 <- df_even_w_dem.fire %>%
  filter(!is.na(fire1)&!is.na(fire2)&!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated5 <- temp5 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod5_n <- nrow(treated5)

# table
models_rrFIRE <- list(mod_rr1_flip, mod1_fire1_flip,mod3_fire2_flip,
                      mod_rr3_flip, mod2_fire1_flip, mod4_fire2_flip)

stargazer(models_rrFIRE,
          type="latex", 
          model.numbers=F, style="apsr",
          omit = c("mc_seniority", "mc_gender"),
          header=F, title = "Effects of POC/Black MC on MC Approval, Interacting MC Race with Racial Resentment and FIRE Scale (White Respondents)",
          column.labels=c("POC MC", "Black MC"), 
          covariate.labels=c("Reverse-scaled Racial Resentment", "Reverse-scaled FIRE 1",
                             "Reverse-scaled FIRE 2", "POC MC", "Black MC", "POC MC x Reverse-scaled RR",
                             "POC MC x Reverse-scaled FIRE 1",
                             "POC MC x Reverse-scaled FIRE 2",
                             "Black MC x Reverse-scaled RR", 
                             "Black MC x Reverse-scaled FIRE 1",
                             "Black MC x Reverse-scaled FIRE 2"),
          add.lines = list(c("District * MC Party FEs", "Y", "Y", "Y", "Y", "Y", "Y"),
                           c("Congressional session FEs", "Y", "Y", "Y", "Y", "Y", "Y"),
                           c("No. districts w MC race change", mod1_n, mod4_n,mod4_n , mod3_n, mod5_n, mod5_n)), 
          dep.var.labels=c("MC approval"), font.size="small", 
          omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)

# For text 
# correlation between FIRE1 & RR, FIRE2 & RR

cor.test(df_even_w_dem$fire1,df_even_w_dem$rr_index)
cor.test(df_even_w_dem$fire2,df_even_w_dem$rr_index)

# 
## Figure 5: RR/FIRE x POC MC predicted ----

# Create flipped index
df_even_w_dem.index <- df_even_w_dem %>%
  mutate(rr_index_flip = 1 - rr_index)

df_even_w_dem.fire <- df_even_w_dem %>%
  # making it so higher values are more liberal (initial coding in cleaning was high values as conservative)
  mutate(fire1_index_flip = 1-fire1,
         fire2_index_flip = 1-fire2) 

# Function to create clustered standard errors
cluster_se <- function(model, cluster_var) {
  require(sandwich)
  require(lmtest)
  
  # Calculate clustered standard errors
  clustered.se <- sqrt(diag(vcovCL(model, cluster = cluster_var)))
  return(clustered.se)
}

# Run models with fixed effects using lm()
mod_rr1_flip <- lm(approval_rep ~ rr_index_flip + rr_index_flip:nonwhite_mc + 
                     nonwhite_mc + mc_seniority + mc_gender + 
                     factor(cong) + factor(dist_party_mc), 
                   data = df_even_w_dem.index, 
                   weights = weight)

mod_rr3_flip <- lm(approval_rep ~ rr_index_flip + rr_index_flip:black_mc_nw + 
                     black_mc_nw + mc_seniority + mc_gender + 
                     factor(cong) + factor(dist_party_mc), 
                   data = df_even_w_dem.index, 
                   weights = weight)

# FIRE 1
mod_fire1_flip_poc <- lm(approval_rep ~ fire1_index_flip + fire1_index_flip:nonwhite_mc + 
                     nonwhite_mc + mc_seniority + mc_gender + 
                     factor(cong) + factor(dist_party_mc), 
                   data = df_even_w_dem.fire, 
                   weights = weight)

mod_fire1_flip_bl <- lm(approval_rep ~ fire1_index_flip + fire1_index_flip:black_mc_nw + 
                     black_mc_nw + mc_seniority + mc_gender + 
                     factor(cong) + factor(dist_party_mc), 
                   data = df_even_w_dem.fire, 
                   weights = weight)

# FIRE 2
mod_fire2_flip_poc <- lm(approval_rep ~ fire2_index_flip + fire2_index_flip:nonwhite_mc + 
                           nonwhite_mc + mc_seniority + mc_gender + 
                           factor(cong) + factor(dist_party_mc), 
                         data = df_even_w_dem.fire, 
                         weights = weight)

mod_fire2_flip_bl <- lm(approval_rep ~ fire2_index_flip + fire2_index_flip:black_mc_nw + 
                          black_mc_nw + mc_seniority + mc_gender + 
                          factor(cong) + factor(dist_party_mc), 
                        data = df_even_w_dem.fire, 
                        weights = weight)


# Function to generate predictions with confidence intervals
generate_predictions <- function(model, newdata, se_clustered) {
  # Get predicted values
  pred <- predict(model, newdata = newdata, se.fit = TRUE)
  
  # Calculate confidence intervals
  critical_value <- qt(0.975, df = model$df.residual)
  
  # Create prediction dataframe with confidence intervals
  pred_df <- data.frame(
    fit = pred$fit,
    lwr = pred$fit - critical_value * pred$se.fit,
    upr = pred$fit + critical_value * pred$se.fit
  )
  
  return(pred_df)
}

# Generate sequence of values
set.seed(123)
rr_values <- seq(min(df_even_w_dem.index$rr_index_flip, na.rm = TRUE), 
                 max(df_even_w_dem.index$rr_index_flip, na.rm = TRUE), 
                 length.out = 100)

fire1_values <- seq(min(df_even_w_dem.fire$fire1_index_flip, na.rm = TRUE), 
                 max(df_even_w_dem.fire$fire1_index_flip, na.rm = TRUE), 
                 length.out = 100)

fire2_values <- seq(min(df_even_w_dem.fire$fire2_index_flip, na.rm = TRUE), 
                    max(df_even_w_dem.fire$fire2_index_flip, na.rm = TRUE), 
                    length.out = 100)

# Create newdata for predictions
newdata1 <- expand.grid(
  rr_index_flip = rr_values,
  nonwhite_mc = c(0, 1),
  mc_seniority = mean(df_even_w_dem.index$mc_seniority, na.rm = TRUE),
  mc_gender = 0
)

# Add dummy variables for fixed effects (using modal values)
modal_congress <- as.numeric(names(sort(table(df_even_w_dem.index$cong), decreasing = TRUE)[1]))
modal_dist_party <- as.numeric(names(sort(table(df_even_w_dem.index$dist_party_mc), decreasing = TRUE)[1]))

newdata1$cong <- modal_congress
newdata1$dist_party_mc <- modal_dist_party

newdata2 <- expand.grid(
  rr_index_flip = rr_values,
  black_mc_nw = c(0, 1),
  mc_seniority = mean(df_even_w_dem.index$mc_seniority, na.rm = TRUE),
  mc_gender = 0
)
newdata2$cong <- modal_congress
newdata2$dist_party_mc <- modal_dist_party

newdata3 <- expand.grid(
  fire1_index_flip = fire1_values,
  nonwhite_mc = c(0, 1),
  mc_seniority = mean(df_even_w_dem.fire$mc_seniority, na.rm = TRUE),
  mc_gender = 0
)

newdata3$cong <- modal_congress
newdata3$dist_party_mc <- modal_dist_party

newdata4 <- expand.grid(
  fire1_index_flip = fire1_values,
  black_mc_nw = c(0, 1),
  mc_seniority = mean(df_even_w_dem.fire$mc_seniority, na.rm = TRUE),
  mc_gender = 0
)

newdata4$cong <- modal_congress
newdata4$dist_party_mc <- modal_dist_party

newdata5 <- expand.grid(
  fire2_index_flip = fire2_values,
  nonwhite_mc = c(0, 1),
  mc_seniority = mean(df_even_w_dem.fire$mc_seniority, na.rm = TRUE),
  mc_gender = 0
)

newdata5$cong <- modal_congress
newdata5$dist_party_mc <- modal_dist_party

newdata6 <- expand.grid(
  fire2_index_flip = fire2_values,
  black_mc_nw = c(0, 1),
  mc_seniority = mean(df_even_w_dem.fire$mc_seniority, na.rm = TRUE),
  mc_gender = 0
)

newdata6$cong <- modal_congress
newdata6$dist_party_mc <- modal_dist_party


# Generate predictions
pred1 <- generate_predictions(mod_rr1_flip, newdata1, 
                              cluster_se(mod_rr1_flip, df_even_w_dem.index$state_dist))
pred2 <- generate_predictions(mod_rr3_flip, newdata2, 
                              cluster_se(mod_rr3_flip, df_even_w_dem.index$state_dist))
pred3 <- generate_predictions(mod_fire1_flip_poc, newdata3, 
                              cluster_se(mod_fire1_flip_poc, df_even_w_dem.fire$state_dist))
pred4 <- generate_predictions(mod_fire1_flip_bl, newdata4, 
                              cluster_se(mod_fire1_flip_bl, df_even_w_dem.fire$state_dist))
pred5 <- generate_predictions(mod_fire2_flip_poc, newdata5, 
                              cluster_se(mod_fire2_flip_poc, df_even_w_dem.fire$state_dist))
pred6 <- generate_predictions(mod_fire2_flip_bl, newdata6, 
                              cluster_se(mod_fire2_flip_bl, df_even_w_dem.fire$state_dist))

# Combine predictions into final dataset
pred_tidy1 <- cbind(newdata1, pred1) %>%
  mutate(type = "POC vs. White MCs",
         attitude = "Racial Resentment",
         index = rr_index_flip) %>%
  rename(mc.race = nonwhite_mc)

pred_tidy2 <- cbind(newdata2, pred2) %>%
  mutate(type = "Black vs. White MCs",
         attitude = "Racial Resentment",
         index = rr_index_flip) %>%
  rename(mc.race = black_mc_nw)

pred_tidy3 <- cbind(newdata3, pred3) %>%
  mutate(type = "POC vs. White MCs",
         attitude = "FIRE 1",
         index = fire1_index_flip) %>%
  rename(mc.race = nonwhite_mc)

pred_tidy4 <- cbind(newdata4, pred4) %>%
  mutate(type = "Black vs. White MCs",
         attitude = "FIRE 1",
         index = fire1_index_flip) %>%
  rename(mc.race = black_mc_nw)

pred_tidy5 <- cbind(newdata5, pred5) %>%
  mutate(type = "POC vs. White MCs",
         attitude = "FIRE 2",
         index = fire2_index_flip) %>%
  rename(mc.race = nonwhite_mc)

pred_tidy6 <- cbind(newdata6, pred6) %>%
  mutate(type = "Black vs. White MCs",
         attitude = "FIRE 2",
         index = fire2_index_flip) %>%
  rename(mc.race = black_mc_nw)

# Combine all predictions
pred.tidy.all <- bind_rows(pred_tidy1, pred_tidy2,
                           pred_tidy3, pred_tidy4,
                           pred_tidy5, pred_tidy6)

# relabeling 
pred.tidy.all$mc.race <- ifelse(pred.tidy.all$mc.race==1, "POC/Black MC", "White MC")

# reordering 
pred.tidy.all$type <- factor(pred.tidy.all$type, levels = c("POC vs. White MCs", "Black vs. White MCs"))

pred.tidy.all$attitude <- factor(pred.tidy.all$attitude, levels = c("Racial Resentment", "FIRE 1", "FIRE 2"))


# Plot predicted values
ggplot(pred.tidy.all, aes(x = index, y = fit,
                          color = factor(mc.race))) +
  geom_line(size = .25) +
  geom_ribbon(aes(ymin = lwr, ymax = upr, fill = factor(mc.race)), alpha = 0.3) +
  labs(x = "Racial Resentment Index/FIRE Scale\n(From most racially conservative to most racially liberal)", 
       y = "Predicted approval of...") +
  theme_bw() + 
  scale_color_grey() +
  scale_fill_grey() +
  facet_grid(type ~ attitude, switch = "y") +
  theme(legend.title = element_blank(),
        legend.position = "bottom",
        #axis.title.y = element_text(angle = 0, vjust = 0.5),
        strip.text.x.top = element_text(size = 10, face="bold"),
        strip.text.y = element_text(angle=0, face="bold"),
        panel.spacing = unit(1.25, "lines"),
        axis.text.x = element_text(angle=15),
        strip.placement = "outside",
        strip.background = element_blank(),
        panel.grid.minor = element_blank()) +
  scale_x_continuous(
    limits = c(0, 1),
    breaks = c(0, 1),
    labels = c("Conservative", "Liberal")
  ) +
  ylim(0.25, 1)

#
## Figure 6: Perceived incongruence for POC and White MCs ----

# restrict to D and R 
df_even_w.2 <- df_even_w_dem %>%
  filter(mc_party==0 | mc_party==1) %>%
  mutate(mc_party = as.character(mc_party)) %>%
  filter(nonwhite_mc==0 | nonwhite_mc==1) %>%
  mutate(nonwhite_mc = as.character(nonwhite_mc))

avg.perceived.con <- df_even_w.2 %>%
  select(incongruence_ideo2, year, mc_party, nonwhite_mc) %>%
  filter(year != 2008) %>%
  group_by(year, mc_party, nonwhite_mc) %>%
  summarise(avg. = mean(incongruence_ideo2, na.rm = TRUE),
            ci_low = mean(incongruence_ideo2, na.rm = TRUE) - 1.96 * 
              (sd(incongruence_ideo2, na.rm = TRUE) / sqrt(n())),
            ci_high = mean(incongruence_ideo2, na.rm = TRUE) + 1.96 * 
              (sd(incongruence_ideo2, na.rm = TRUE) / sqrt(n()))) %>%
  pivot_wider( names_from = nonwhite_mc,
               values_from = c(avg., ci_low, ci_high))  %>%
  mutate(mc_party = case_when(mc_party==1 ~ "Democratic MCs",
                              mc_party==0 ~ "Republican MCs"),
         dif.race = avg._1 - avg._0)


ggplot(avg.perceived.con, aes(x = year, y = dif.race, group = mc_party)) +
  geom_hline(yintercept = 0, colour = "grey60", linetype = 2) +
  geom_point(aes(color = mc_party), size = 2) +
  geom_line(aes(color = mc_party)) +
  geom_errorbar(
    aes(
      x = year,
      ymin = dif.race - (ci_high_1 - ci_low_1) / 2,
      ymax = dif.race + (ci_high_1 - ci_low_1) / 2,
      color = mc_party
    ), width = 0.125, size = .5) +
  theme_minimal() +
  scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024)) +
  scale_colour_manual(values = c("black", "grey60")) +
  theme(
    legend.title = element_blank(),
    plot.title = element_text(size = 12, face = "bold"),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 12, angle=0, vjust=.5),
    legend.text = element_text(size = 10),
    legend.position = "bottom",
    legend.margin = margin(0),
    legend.box.margin = margin(-10),
    panel.grid.minor = element_blank()
  ) +
  labs(x = "Year", y = "Difference in average perceived \nideological incongruence\n(POC MCs - white MCs)") +
  ylim(-0.15, 0.15) 

#
## Figure 7: Effectiveness ----
# Panel A: Effectiveness over time (among respondents)---- 

# restrict to D and R 
df_even_w.2 <- df_even_w %>%
  filter(party7==1 | party7==3) %>%
  mutate(party7 = as.character(party7)) %>%
  # removing effectiveness over 10 for now 
  #filter(mc_effectiveness_new < 10) %>%
  # rescale effectiveness to 0-1 
  mutate(effectiveness_scale = scales::rescale(mc_effectiveness_new))

# Calculate means and standard errors for white respondents
average.effect.w <- df_even_w.2 %>%
  #filter(mc_effectiveness_new < 5) %>%
  select(mc_effectiveness_new, year, party7, nonwhite_mc) %>%
  filter(year != 2008) %>%
  group_by(year, party7, nonwhite_mc) %>%
  summarise(
    avg. = median(mc_effectiveness_new, na.rm = T),
    se = sd(mc_effectiveness_new, na.rm = T) / sqrt(n()),
    n = n()
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(
           party7==1 ~ "White Democratic Respondents",
           party7==3 ~ "White Republican Respondents"),
         nonwhite_mc = case_when(nonwhite_mc==1 ~ "POC MC",
                                 nonwhite_mc==0 ~ "White MC",
                                 TRUE ~ NA_character_)) %>%
  filter(!is.na(nonwhite_mc))

# Create the plot with error bars and larger points
PanelA <- ggplot(average.effect.w, aes(year, avg., group=factor(nonwhite_mc))) +
  # Add error bars
  geom_errorbar(aes(ymin = avg. - 1.96*se, 
                    ymax = avg. + 1.96*se,
                    color = factor(nonwhite_mc)),
                width = 0.3) +
  # Increase point size to 3
  geom_point(aes(color=factor(nonwhite_mc), shape=factor(nonwhite_mc)), size = 2) + 
  geom_line(aes(color=factor(nonwhite_mc))) +
  theme_bw() + facet_wrap(~party7)+
  scale_x_continuous(breaks = c(2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024)) +
  scale_color_grey() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12, angle=0, vjust=.5),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.margin=margin(0),
        strip.background = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text = element_text(face="bold")) +
  labs(x = "Year", y = "Median MC\nEffectiveness\n(0-18)",
       title = "Panel A: MCs representing CCES respondents") +
  ylim(0,5)


#
# Panel B: Effectiveness over time (raw MCs)---- 
df.mc <- read.csv("./Full_MCdata_110-118.v2.csv")


# restrict to D and R 
df.mc2 <- df.mc %>%
  # rescale effectiveness to 0-1 
  mutate(effectiveness_scale = scales::rescale(mc_effectiveness_new))

# Calculate means and standard errors for white respondents
average.effect.mc <- df.mc2 %>%
  select(mc_effectiveness_new, mc_year, nonwhite_mc) %>%
  #filter(mc_year != 2008 & mc_year != 2022) %>%
  mutate(year = mc_year + 1) %>%
  group_by(year, nonwhite_mc) %>%
  summarise(
    avg. = median(mc_effectiveness_new, na.rm = T),
    se = sd(mc_effectiveness_new, na.rm = T) / sqrt(n()),
    n = n()
  ) %>%
  mutate(nonwhite_mc = case_when(nonwhite_mc==1 ~ "POC MC",
                                 nonwhite_mc==0 ~ "White MC"))

# Create the plot with error bars and larger points
PanelB <- ggplot(average.effect.mc, aes(year, avg., color=nonwhite_mc)) +
  # Add error bars
  geom_errorbar(aes(ymin = avg. - 1.96*se, 
                    ymax = avg. + 1.96*se, color=nonwhite_mc),
                width = 0.3) +
  # Increase point size to 3
  geom_point(size = 2) + 
  geom_line(aes(color=nonwhite_mc)) +
  theme_bw() + #facet_wrap(~nonwhite_mc)+
  scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022,2024)) +
  scale_color_grey() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12, angle=0, vjust=.5),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.margin=margin(0),
        panel.grid.minor = element_blank(),
        strip.background = element_blank()) +
  labs(x = "Year", y = "Median MC\nEffectiveness\n(0-18)", 
       title="Panel B: MC data only") +
  ylim(0,5)

# A&B together ---- 
ggarrange(PanelA, PanelB, ncol=1, common.legend = TRUE, legend = "bottom")

#
#### APPENDIX #### 
## Appendix A: Descriptive Statistics ----
# A1: Number of white CES respondents by race of MC ----
bl_num <- as.matrix(table(df_even_w$cong, df_even_w$black_mc_nw))
pct_bl <- as.vector((bl_num[,2]/(bl_num[,1] + bl_num[,2]))*100)
bl_num2 <- cbind(bl_num[,2], pct_bl)
colnames(bl_num2) <- c("Black", "Pct Black")

h_num <- as.matrix(table(df_even_w$cong, df_even_w$hispanic_mc_nw))
pct_h <- as.vector((h_num[,2]/(h_num[,1] + h_num[,2]))*100)
h_num2 <- cbind(h_num[,2], pct_h)
colnames(h_num2) <- c("Hisp.", "Pct Hisp.")

a_num <- as.matrix(table(df_even_w$cong, df_even_w$asian_mc_nw))
pct_a <- as.vector((a_num[,2]/(a_num[,1] + a_num[,2]))*100)
a_num2 <- cbind(a_num[,2], pct_a)
colnames(a_num2) <- c("Asian", "Pct Asian")

w_num <- as.matrix(table(df_even_w$cong, df_even_w$nonwhite_mc))
pct_w <- as.vector((w_num[,1]/(w_num[,1] + w_num[,2]))*100)
w_num2 <- cbind(w_num[,1], pct_w)
colnames(w_num2) <- c("White", "Pct White")

race_cong <- cbind(bl_num2, h_num2, a_num2, w_num2)
rownames(race_cong) <- c("110th Congress", "111th Congress","112th Congress",
                         "113th Congress","114th Congress","115th Congress", 
                         "116th Congress", "117th Congress", "118th Congress")
stargazer(race_cong, type = "latex", title="Number of white CES respondents by race of MC", digits=2, header=F, style="apsr")

#
# A2: Number of white CES Respondents by Party ----
cong_num <- as.matrix(table(df_even_w$cong))
cong_num2 <- cbind(cong_num[,1])
colnames(cong_num2) <- c("N")
rownames(cong_num2) <- c("110th Congress", "111th Congress","112th Congress",
                         "113th Congress","114th Congress","115th Congress",
                         "116th Congress", "117th Congress", "118th Congress")

rparty_num <- as.matrix(table(df_even_w$cong, df_even_w$party7))
colnames(rparty_num) <- c("Democratic Respondents", "Independent/Other Respondents", "Republican Respondents")
rownames(rparty_num) <- c("110th Congress", "111th Congress","112th Congress",
                          "113th Congress","114th Congress","115th Congress", "116th Congress",
                          "117th Congress", "118th Congress")
rpct_D <- as.vector((rparty_num[,1]/(rparty_num[,1] + rparty_num[,2] + rparty_num[,3]))*100)
rpct_Ind <- as.vector((rparty_num[,2]/(rparty_num[,1] + rparty_num[,2]+ rparty_num[,3]))*100)
rpct_R <- as.vector((rparty_num[,3]/(rparty_num[,1] + rparty_num[,2]+ rparty_num[,3]))*100)
rparty_num2 <- cbind(rparty_num[,1], rpct_D, rparty_num[,2], rpct_Ind, rparty_num[,3], rpct_R)
colnames(rparty_num2) <- c("Dem.", "Pct D", "Ind./Other", "Pct I",
                           "Rep.", "Pct R")

cong_party <- cbind(cong_num2,rparty_num2)
stargazer(cong_party, type = "latex", title="Number of white CES Respondents by Party", digits=2, header=F, style="apsr")

#
# A3&A4: White respondents with D/R MCs in each racial group ----
# create datasets of MC party7 (still with only white respondents)
df_even_MCdem_rw <- df_even_w %>%
  filter(mc_party == 1)

df_even_MCrep_rw <- df_even_w %>%
  filter(mc_party == 0)

bl_num_D <- as.matrix(table(df_even_MCdem_rw$cong, df_even_MCdem_rw$black_mc_nw))
bl_num_R <- as.matrix(table(df_even_MCrep_rw$cong, df_even_MCrep_rw$black_mc_nw))
h_num_D <- as.matrix(table(df_even_MCdem_rw$cong, df_even_MCdem_rw$hispanic_mc_nw))
h_num_R <- as.matrix(table(df_even_MCrep_rw$cong, df_even_MCrep_rw$hispanic_mc_nw))
a_num_D <- as.matrix(table(df_even_MCdem_rw$cong, df_even_MCdem_rw$asian_mc_nw))
a_num_R <- as.matrix(table(df_even_MCrep_rw$cong, df_even_MCrep_rw$asian_mc_nw))
nw_num_D <- as.matrix(table(df_even_MCdem_rw$cong, df_even_MCdem_rw$nonwhite_mc))
nw_num_R <- as.matrix(table(df_even_MCrep_rw$cong, df_even_MCrep_rw$nonwhite_mc))

pct_bl_num_D <- as.vector((bl_num_D[,2]/(bl_num_D[,1] + bl_num_D[,2]))*100)
pct_bl_num_R <- as.vector((bl_num_R[,2]/(bl_num_R[,1] + bl_num_R[,2]))*100)
pct_h_num_D <- as.vector((h_num_D[,2]/(h_num_D[,1] + h_num_D[,2]))*100)
pct_h_num_R <- as.vector((h_num_R[,2]/(h_num_R[,1] + h_num_R[,2]))*100)
pct_a_num_D <- as.vector((a_num_D[,2]/(a_num_D[,1] + a_num_D[,2]))*100)
pct_a_num_R <- as.vector((a_num_R[,2]/(a_num_R[,1] + a_num_R[,2]))*100)
pct_w_num_D <- as.vector((nw_num_D[,1]/(nw_num_D[,1] + nw_num_D[,2]))*100)
pct_w_num_R <- as.vector((nw_num_R[,1]/(nw_num_R[,1] + nw_num_R[,2]))*100)

d_party_race <- cbind(bl_num_D[,2], pct_bl_num_D,
                      h_num_D[,2], pct_h_num_D,
                      a_num_D[,2], pct_a_num_D,
                      nw_num_D[,1], pct_w_num_D)
r_party_race <- cbind(bl_num_R[,2], pct_bl_num_R,
                      h_num_R[,2], pct_h_num_R,
                      a_num_R[,2], pct_a_num_R,
                      nw_num_R[,1], pct_w_num_R)
colnames(d_party_race) <- c("Black D", "Pct. BD", "Hisp. D", "Pct. HD", 
                            "Asian D", "Pct. AD", "White D", "Pct. WD")
colnames(r_party_race) <- c("Black R", "Pct. BR", "Hisp. R", "Pct. HR",
                            "Asian R", "Pct. AR", "White R", "Pct. WR")

rownames(d_party_race) <- c("110th Congress", "111th Congress","112th Congress",
                    "113th Congress","114th Congress","115th Congress", 
                    "116th Congress", "117th Congress", "118th Congress")

rownames(r_party_race) <- c("110th Congress", "111th Congress","112th Congress",
                            "113th Congress","114th Congress","115th Congress", 
                            "116th Congress", "117th Congress", "118th Congress")


stargazer(d_party_race, type = "latex", title="White respondents with Democratic MCs in each racial group", digits=2, header=F, style = "apsr")

stargazer(r_party_race, type = "latex", title="White respondents with Republican MCs in each racial group", digits=2, header=F, style = "apsr")

#
# A5: MC Race by Party and Congress, 2008-2020 ----

# Generate a table with the number of nonwhite MCs by party and year
racebyyear_tab <- df_even_w %>%
  mutate(nonwhite_mc = as.numeric(as.character(nonwhite_mc)),
         mc_party = case_when(mc_party==0~"Republican",
                              mc_party==1~"Democrat",
                              T~NA_character_)) %>%
  # select just nonwhite, state, and congress 
  select(nonwhite_mc, cong, mc_party, state_dist) %>%
  # Filter out NAs 
  filter(!is.na(mc_party) & !is.na(nonwhite_mc)) %>%
  # Get only distinct combos 
  distinct() %>%
  # Grouping by MC party 
  group_by(mc_party, cong) %>%
  # Generate variables for the number of nonwhite MCs and the percentage of nonwhite MCs within region 
  mutate(number_nonwhite = sum(nonwhite_mc==1),
         number_white = sum(nonwhite_mc==0),
         pct_nonwhite = round(100*(sum(nonwhite_mc==1)/(sum(nonwhite_mc==1)+sum(nonwhite_mc==0))), 2)) %>%
  group_by(mc_party, cong, number_nonwhite, number_white, pct_nonwhite) %>%
  summarise() 

colnames(racebyyear_tab) <- c("Party", "Congress", "# POC MCs", "# White MCs", "% POC MCs")

# apply the model to each df and produce stargazer result
kbl(racebyyear_tab, caption="MC Race by Party and Congressional Session, 2008-2020", booktabs = T, "latex")

#
# A6: Regions ----

df_region <- df_even_w %>% 
  mutate(nonwhite_mc = as.numeric(as.character(nonwhite_mc))) %>%
  # Put each state into their census regions with a new variable for region 
  mutate(region = case_when(st_abbr %in% c("CT", "ME", "MA", "NH", "RI", "VT") ~ "New England",
                            st_abbr %in% c("NJ", "NY", "PA") ~ "Middle Atlantic",
                            st_abbr %in% c("IN", "IL", "MI", "OH", "WI") ~ "East North Central",
                            st_abbr %in% c("IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "West North Central",
                            st_abbr %in% c("DE", "FL", "GA", "MD", "NC", "SC", "VA", "WV") ~ "South Atlantic",
                            st_abbr %in% c("AL", "KY", "MS", "TN") ~ "East South Central",
                            st_abbr %in% c("AR", "LA", "OK", "TX") ~ "West South Central",
                            st_abbr %in% c("AZ", "CO", "ID", "NM", "MT", "UT", "NV", "WY") ~ "Mountain West",
                            st_abbr %in% c("AK", "CA", "HI", "OR", "WA") ~ "Pacific West"))

# Generate a table with the number of nonwhite MCs for each region, as well as the percentage within region of nonwhite
region_tab <- df_region %>%
  # select just nonwhite, state, and congress 
  select(nonwhite_mc, cong, region, state_dist) %>%
  # Filter out any regions with NA 
  filter(!is.na(region) & !is.na(nonwhite_mc)) %>%
  # Get only distinct combos 
  distinct() %>%
  # Grouping by census region 
  group_by(region) %>%
  # Generate variables for the number of nonwhite MCs and the percentage of nonwhite MCs within region 
  mutate(number_nonwhite = sum(nonwhite_mc==1),
         number_white = sum(nonwhite_mc==0),
         pct_nonwhite = round(100*(sum(nonwhite_mc==1)/(sum(nonwhite_mc==1)+sum(nonwhite_mc==0))), 2)) %>%
  group_by(region, number_nonwhite, number_white, pct_nonwhite) %>%
  summarise()

colnames(region_tab) <- c("Region", "# POC MCs", "# White MCs", "% POC MCs")

kbl(region_tab, caption="MC Race by Region, 2008-2020", "latex", booktabs = T)

#
# Figure A1: RR & FIRE over time ----

# restrict to D and R 
df_even_w.2 <- df_even_w %>%
  filter(party7==1 | party7==3) %>%
  mutate(party7 = as.character(party7))

# Calculate means and standard errors for white respondents -- racial resentment 
average.rr.w <- df_even_w.2 %>%
  select(rr_index, year, party7, weight) %>%
  filter(year != 2008) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = wtd.mean(rr_index, weights = weight, na.rm = TRUE),
    se = sqrt(wtd.var(rr_index, weights = weight, na.rm = TRUE) / sum(!is.na(rr_index))),
    n = sum(!is.na(rr_index)),
    .groups = "drop"
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(
           party7 == 1 ~ "White Democrats",
           party7 == 3 ~ "White Republicans"
         ),
         type = "Racial Resentment")

# Calculate means and standard errors for white respondents -- FIRE 1
average.fire1.w <- df_even_w.2 %>%
  select(fire1, year, party7, weight) %>%
  filter(year != 2008) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = wtd.mean(fire1, weights = weight, na.rm = TRUE),
    se = sqrt(wtd.var(fire1, weights = weight, na.rm = TRUE) / sum(!is.na(fire1))),
    n = sum(!is.na(fire1)),
    .groups = "drop"
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(
           party7 == 1 ~ "White Democrats",
           party7 == 3 ~ "White Republicans"
         ),
         type = "FIRE 1")

# Calculate means and standard errors for white respondents -- FIRE 1
average.fire2.w <- df_even_w.2 %>%
  select(fire2, year, party7, weight) %>%
  filter(year != 2008) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = wtd.mean(fire2, weights = weight, na.rm = TRUE),
    se = sqrt(wtd.var(fire2, weights = weight, na.rm = TRUE) / sum(!is.na(fire2))),
    n = sum(!is.na(fire2)),
    .groups = "drop"
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(
           party7 == 1 ~ "White Democrats",
           party7 == 3 ~ "White Republicans"
         ),
         type = "FIRE 2") 

average.all <- rbind(average.rr.w, average.fire1.w, average.fire2.w)

# Create the plot with error bars and larger points
ggplot(average.all, aes(year, avg., group=party7)) +
  # Add error bars
  geom_errorbar(aes(ymin = avg. - 1.96*se, 
                    ymax = avg. + 1.96*se,
                    color = party7),
                width = 0.4) +
  # Increase point size to 3
  geom_point(aes(color=party7, shape=party7), size = 3) + 
  geom_line(aes(color=party7)) +
  facet_wrap(~type) +
  theme_bw() +
  scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024)) +
  scale_color_grey() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12, angle=0, vjust=.5),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        axis.text = element_text(size=10)) +
  labs(x = "Year", y = "Average") +
  ylim(0,1)


#
# Figure A2: Demographic shifts in districts ---- 

# GEO_ID, NAME, DP05_0001E (total popultion), DP05_0032E (one race, white), DP05_0033E (one race, black), 
#DP05_0034E (one race, native), DP05_0039E (one race, asian), DP05_0047E (one race, native hawaiian)   

df.mc <- read.csv("./Full_MCdata_110-118.v2.csv")

# load in demographic data from ACS 
df.acs10 <- read.csv("./Census demographics/ACSDP5Y2010.DP05-Data.csv") %>%
  select(GEO_ID, NAME, DP05_0001E, DP05_0032E, DP05_0033E) %>%
  rename(Population = DP05_0001E,
         Pop_white = DP05_0032E,
         Pop_black = DP05_0033E) %>%
  mutate(Year = 2010)
df.acs12 <- read.csv("./Census demographics/ACSDP5Y2012.DP05-Data.csv") %>%
  select(GEO_ID, NAME, DP05_0001E, DP05_0032E, DP05_0033E) %>%
  rename(Population = DP05_0001E,
         Pop_white = DP05_0032E,
         Pop_black = DP05_0033E)%>%
  mutate(Year = 2012)
df.acs14 <- read.csv("./Census demographics/ACSDP5Y2014.DP05-Data.csv") %>%
  select(GEO_ID, NAME, DP05_0001E, DP05_0032E, DP05_0033E) %>%
  rename(Population = DP05_0001E,
         Pop_white = DP05_0032E,
         Pop_black = DP05_0033E)%>%
  mutate(Year = 2014)
df.acs16 <- read.csv("./Census demographics/ACSDP5Y2016.DP05-Data.csv") %>%
  select(GEO_ID, NAME, DP05_0001E, DP05_0032E, DP05_0033E) %>%
  rename(Population = DP05_0001E,
         Pop_white = DP05_0032E,
         Pop_black = DP05_0033E)%>%
  mutate(Year = 2016)
df.acs18 <- read.csv("./Census demographics/ACSDP5Y2018.DP05-Data.csv") %>%
  select(GEO_ID, NAME, DP05_0001E, DP05_0037PE, DP05_0038PE) %>%
  rename(Population = DP05_0001E,
         pct_white = DP05_0037PE,
         pct_black = DP05_0038PE)%>%
  mutate(Year = 2018)
df.acs20 <- read.csv("./Census demographics/ACSDP5Y2020.DP05-Data.csv") %>%
  select(GEO_ID, NAME, DP05_0001E, DP05_0037PE, DP05_0038PE) %>%
  rename(Population = DP05_0001E,
         pct_white = DP05_0037PE,
         pct_black = DP05_0038PE)%>%
  mutate(Year = 2020)
df.acs22 <- read.csv("./Census demographics/ACSDP5Y2022.DP05-Data.csv") %>%
  select(GEO_ID, NAME, DP05_0001E, DP05_0037PE, DP05_0038PE) %>%
  rename(Population = DP05_0001E,
         pct_white = DP05_0037PE,
         pct_black = DP05_0038PE)%>%
  mutate(Year = 2022)

# don't have 2024 acs

# merge first set 
df.acs.demos1 <- rbind(df.acs10, df.acs12, df.acs14, df.acs16) %>% 
  filter(GEO_ID != "Geography") %>%
  mutate(NAME = gsub("\\(at Large\\)", "1", NAME)) %>%
  separate(NAME, into = c("district", "state"), sep=", ") %>%
  filter(state != "Puerto Rico" & state != "District of Columbia") %>%
  filter(!grepl(pattern = "not defined", x=district)) %>%
  unique() %>%
  mutate(district = gsub("Congressional District ", "", district)) %>%
  separate(district, into=c("dist", "cong", sep=" \\(1")) %>%
  select(-" \\(1") %>%
  mutate(cong = gsub("th","",cong)) %>%
  select(-GEO_ID) %>%
  mutate(cong = as.numeric(cong),
         dist = as.numeric(dist),
         Population = as.numeric(as.character(Population)),
         Pop_white = as.numeric(as.character(Pop_white)),
         Pop_black = as.numeric(as.character(Pop_black)),
         pct_white = (Pop_white/Population)*100,
         pct_black = (Pop_black/Population)*100) %>%
  select(-c(Pop_white, Pop_black))


# Merge second set 
df.acs.demos2 <- rbind(df.acs18, df.acs20, df.acs22) %>%
  filter(GEO_ID != "Geography") %>%
  mutate(NAME = gsub("\\(at Large\\)", "1", NAME)) %>%
  separate(NAME, into = c("district", "state"), sep=", ") %>%
  filter(state != "Puerto Rico" & state != "District of Columbia") %>%
  filter(!grepl(pattern = "not defined", x=district)) %>%
  unique() %>%
  mutate(district = gsub("Congressional District ", "", district)) %>%
  separate(district, into=c("dist", "cong", sep=" \\(1")) %>%
  select(-" \\(1") %>%
  mutate(cong = gsub("th","",cong)) %>%
  select(-GEO_ID) %>%
  mutate(cong = as.numeric(cong),
         dist = as.numeric(dist))

# put together 
df.acs.demos <- rbind(df.acs.demos1, df.acs.demos2)

# Merge to MC dataset 
df.mc.demo <- df.mc %>%
  mutate(cong = as.numeric(cong),
         dist = as.numeric(dist),
         Year = mc_year+1) %>%
  left_join(df.acs.demos, by=c("dist", "state","Year")) 

## show demographic changes 
# need dataset of mc race in each year as columns and pct white in each year  
df.mc.demo.wide <- df.mc.demo %>%
  mutate(pct_white = as.numeric(as.character(pct_white)),
         nonwhite_mc = as.numeric(as.character(nonwhite_mc))) %>% 
  group_by(dist, state, Year) %>%
  summarise(nonwhite_mc = mean(nonwhite_mc, na.rm = TRUE),
            pct_white = mean(pct_white, na.rm = TRUE),
            .groups = "drop") %>%
  pivot_wider(
    names_from = Year,
    values_from = c(nonwhite_mc, pct_white),
    names_glue = "{.value}_{Year}"
  ) %>% 
  # generate change variables
  # Create MC race shift variables (0 -> 1)
  mutate(mc_10_12 = ifelse(nonwhite_mc_2010 == 0 & nonwhite_mc_2012 == 1, 1, 0),
         mc_12_14 = ifelse(nonwhite_mc_2012 == 0 & nonwhite_mc_2014 == 1, 1, 0),
         mc_14_16 = ifelse(nonwhite_mc_2014 == 0 & nonwhite_mc_2016 == 1, 1, 0),
         mc_16_18 = ifelse(nonwhite_mc_2016 == 0 & nonwhite_mc_2018 == 1, 1, 0),
         mc_18_20 = ifelse(nonwhite_mc_2018 == 0 & nonwhite_mc_2020 == 1, 1, 0),
         mc_20_22 = ifelse(nonwhite_mc_2020 == 0 & nonwhite_mc_2022 == 1, 1, 0),
         # Percentage point changes in pct_white
         pctw_10_12 = pct_white_2012 - pct_white_2010,
         pctw_12_14 = pct_white_2014 - pct_white_2012,
         pctw_14_16 = pct_white_2016 - pct_white_2014,
         pctw_16_18 = pct_white_2018 - pct_white_2016,
         pctw_18_20 = pct_white_2020 - pct_white_2018,
         pctw_20_22 = pct_white_2022 - pct_white_2020)

# I want to plot the average shifts 
df.transitions <- df.mc.demo.wide %>%
  select(starts_with("mc_"), starts_with("pctw_")) %>%
  pivot_longer(cols = everything(), names_to = c(".value", "period"), names_sep = "_(?=\\d{2}_\\d{2})") %>%
  filter(!is.na(mc)) %>%
  mutate(type = case_when(
    mc == 1 ~ "White to POC",
    mc == 0 ~ "Remain White",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(type))

# Average change by transition type
df.transitions.summary <- df.transitions %>%
  group_by(type) %>%
  summarise(avg_change = mean(pctw, na.rm = TRUE), .groups = "drop") 


# Figure
df.transitions.period <- df.transitions %>%
  group_by(period, type) %>%
  summarise(mean_change = mean(pctw, na.rm = TRUE),
            se = sd(pctw, na.rm = TRUE) / sqrt(n()),
            .groups = "drop") %>%
  mutate(lower = mean_change - se,
         upper = mean_change + se) %>%
  mutate(period = case_when(period == "10_12" ~ "2010-2012",
                            period == "12_14" ~ "2012-2014",
                            period == "14_16" ~ "2014-2016",
                            period == "16_18" ~ "2016-2018",
                            period == "18_20" ~ "2018-2020",
                            period == "20_22" ~ "2020-2022"))

ggplot(df.transitions.period, aes(x = period, y = mean_change, color = type, group = type)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(x = "",
       y = "Avg. Change in %\nWhite Population (pp)",
       color = "MC Transition") +
  theme_bw() +
  theme(plot.title = element_text(face = "bold"),
        legend.position = "bottom",
        legend.title = element_blank(),
        axis.title.y=element_text(angle=0, vjust=.5)) +
  scale_color_grey()

#
# Figure A3: Treated districts ---- 
# subset/collapse data to district level, keeping MC race in each year
temp <- df_even_w %>%
  #filter(mc_party==1) %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  group_by(state_dist, mc_party) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated<- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
dists <- treated$state_dist
df_even_w_treated <- df_even_w %>% 
  filter(state_dist %in% dists) %>%
  mutate(region = case_when(st_abbr %in% c("CT", "ME", "MA", "NH", "RI", "VT") ~ "New England",
                            st_abbr %in% c("NJ", "NY", "PA") ~ "Middle Atlantic",
                            st_abbr %in% c("IN", "IL", "MI", "OH", "WI") ~ "East North Central",
                            st_abbr %in% c("IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "West North Central",
                            st_abbr %in% c("DE", "FL", "GA", "MD", "NC", "SC", "VA", "WV") ~ "South Atlantic",
                            st_abbr %in% c("AL", "KY", "MS", "TN") ~ "East South Central",
                            st_abbr %in% c("AR", "LA", "OK", "TX") ~ "West South Central",
                            st_abbr %in% c("AZ", "CO", "ID", "NM", "MT", "UT", "NV", "WY") ~ "Mountain West",
                            st_abbr %in% c("AK", "CA", "HI", "OR", "WA") ~ "Pacific West"),
         state_dist = paste(st_abbr, as.character(dist), sep=" ")) %>%
  group_by(year, state_dist) %>%
  mutate(nonwhite_mc = as.numeric(nonwhite_mc),
         black_mc_nw = as.numeric(black_mc_nw),
         asian_mc_nw = as.numeric(asian_mc_nw),
         hispanic_mc_nw = as.numeric(hispanic_mc_nw),
         state_dist = as.character(state_dist)) %>%
  summarise_at(vars(nonwhite_mc, black_mc_nw, asian_mc_nw, hispanic_mc_nw, approval_rep), mean, na.rm=T) %>%
  filter(state_dist!="NA 1"&state_dist!="HI 1"&state_dist!="NV 4"&state_dist!="NY 16"& state_dist!="PA 2"&state_dist!="UT 4")

# create variable with indicator of MC race
df_even_w_treated <- df_even_w_treated %>%
  mutate(mc_race = case_when(nonwhite_mc==0~0,
                             black_mc_nw==1~1,
                             asian_mc_nw==1~3,
                             hispanic_mc_nw==1~2,
                             T~4))

# color setup
mycol<-brewer.pal(5,"Greys")[c(1,2,3,4,5)]

# plot 
panelview(approval_rep ~ mc_race, data=df_even_w_treated, 
          index=c("state_dist", "year"), xlab="Year", 
          ylab = "District", 
          legend.labs = c("White", "Black", "Hispanic", "Asian American", "Other POC"),
          cex.main = 0, background = "white", color = mycol, 
          cex.legend = 6, cex.lab = 6, cex.axis.x = 6, cex.axis.y = 6)

#caption: We present the districts that were treated during at least one session from 2008 to 2020. We show whether a district was represented by a white or POC member of Congress.
## Appendix B: Don't Know Robustness Check ----
# Table B1: DK Basic - Missing data effects ----
missing_dat <- df_even_w %>%
  mutate(approval_dk = case_when(is.na(approval_rep_na)~1,
                                 T~0),
         ideo_dk = case_when(is.na(ideo_mc_na)~1,
                             T~0))
missing_dat_dem <- missing_dat %>%
  filter(party7==1)
missing_dat_rep <- missing_dat %>%
  filter(party7==3)

mod_approval_dk <- felm(approval_dk ~ nonwhite_mc + mc_seniority + mc_gender | cong + dist_party_mc | 0 |
                          state_dist, data =missing_dat, weights = missing_dat$weight)
mod_approval_dk_dem <- felm(approval_dk ~ nonwhite_mc + mc_seniority + mc_gender | cong + dist_party_mc | 0 |
                              state_dist, data =missing_dat_dem, weights = missing_dat_dem$weight)
mod_approval_dk_rep <- felm(approval_dk ~ nonwhite_mc + mc_seniority + mc_gender | cong + dist_party_mc | 0 |
                              state_dist, data =missing_dat_rep, weights = missing_dat_rep$weight)

stargazer(mod_approval_dk, mod_approval_dk_dem, mod_approval_dk_rep, type="latex",
          column.labels=c("Full sample", "Democrats", "Republicans"), 
          model.numbers=F, style="apsr",
          header=F, title = "Effects of POC MC on Missing/DK Approval Rating",
          #omit = c("mc_seniority", "mc_gender"),
          covariate.labels=c("POC MC", "MC Seniority", "MC Gender"),
          dep.var.labels=c("Missing or DK MC approval"), font.size="small", omit.stat = c("f", "ser"),
          column.sep.width = "-3pt", 
          star.char = c("*", "**", "***"),
          star.cutoffs = c(.05, .01, .001),
          notes.align = "l",
          add.lines = list(c("District * Party FEs", "Y", "Y", "Y"),
                           c("Congress FEs", "Y", "Y")))

#
# Table B2: Mean,Median,NA ----

# regressions for each version of the approval variable 
mod_d <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                       cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)
mod_d.na <- felm(approval_rep_na ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                       cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)
mod_d.mean <- felm(approval_rep_mean ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                       cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)


temp <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep_na)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod2_n <- nrow(treated)

temp <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep_mean)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod3_n <- nrow(treated)

stargazer(mod_d, mod_d.na, mod_d.mean,
          type='latex', header=FALSE, style="apsr",
          dep.var.labels=c("\\shortstack{Median \\\\ Democrats}", "\\shortstack{NA \\\\ Democrats}", "\\shortstack{Mean \\\\ Democrats}"), 
          font.size="small", omit.stat = c("f", "ser"),
          column.sep.width = "-3pt", 
          model.names	= FALSE, colnames = FALSE, model.numbers = FALSE,
          order = c("nonwhite_mc", "rr_index_flip:nonwhite_mc", "rr_index_flip", "mc_seniority", "mc_gender"),
          covariate.labels=c("POC MC", "POC MC x Racial Resentment", "Racial Resentment", "MC Seniority", "MC Gender"),
          add.lines = list(c("District * MC Party FEs", "Y", "Y", "Y"),
                           c("Congress FEs", "Y", "Y", "Y"),
                           c("No. districts w MC race change", mod1_n, mod2_n, mod3_n)), 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001),
          #label = "tab:approval-alt",
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01"), notes.append = F)

#
## Appendix C: Leaners ----

# No Leaners D
df_nolean.d <- df_even2 %>%
  filter(party3 == 1 & nonwhite==0) %>%
  mutate(rr_index_flip = 1-rr_index)

# Leaners D
df_lean.d <- df_even2 %>%
  filter(party7 == 1 & nonwhite==0) %>%
  mutate(rr_index_flip = 1-rr_index)

# Includes leaners D
mod1.lean_d <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                cong + dist_party_mc | 0 | state_dist, data=df_lean.d, weights=df_lean.d$weight)

# Excludes leaners D
mod1.nolean_d <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                cong + dist_party_mc | 0 | state_dist, data=df_nolean.d, weights=df_nolean.d$weight)


# calculate number of treated districts in each dataset 
temp <- df_nolean.d %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp <- df_lean.d %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod2_n <- nrow(treated)


stargazer(mod1.lean_d, mod1.nolean_d,
          type="latex", 
          column.labels=c("\\shortstack{Leaners}","\\shortstack{No Leaners}"), 
          model.numbers=F, style="apsr",
          header=F, 
          order = c("nonwhite_mc", "rr_index_flip:nonwhite_mc", "rr_index_flip", "mc_seniority", "mc_gender"),
          covariate.labels=c("POC MC", "POC MC x Racial Resentment", "Racial Resentment", "MC Seniority", "MC Gender"),
          add.lines = list(c("District * MC Party FEs", "Y", "Y"),
                           c("Congress FEs", "Y", "Y"),
                           c("No. districts w MC race change", mod1_n, mod2_n)), 
          dep.var.labels=c("MC approval"), font.size="small", omit.stat = c("f", "ser", "rsq"),
          column.sep.width = "-5pt", 
          star.char = c("+", "*", "**"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          title = "Effects of POC MC on MC Approval among White Democrats With and Without Party Leaners",
          notes.align = "l",
          label = "tab:dems",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01"), notes.append = F)

#
## Appendix D: Yearly Table ----

# Models by year 

# Democratic respondent model by year 
mod1.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)


# Republican respondent model by year 
mod2.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc:factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_rep, weights = df_even_w_rep$weight)

temp <- df_even_w_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp <- df_even_w_rep %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod2_n <- nrow(treated)

stargazer(mod1.yearly, mod2.yearly, type="latex", 
          column.labels=c("Democratic respondents", "Republican respondents"), 
          model.numbers=F, style="apsr",
          dep.var.labels=c(""),
          header=F, title = "Effects of POC MC on MC Approval by Year",
          covariate.labels=c("White MC | 2010", "White MC | 2012", "White MC | 2014", "White MC | 2016",
                             "White MC | 2018", "White MC | 2020", "White MC | 2022","White MC | 2024",
                             "MC Seniority", "MC Gender", 
                             "POC MC | 2008",
                             "POC MC | 2010", "POC MC | 2012", "POC MC | 2014",
                             "POC MC | 2016", "POC MC | 2018", "POC MC | 2020", 
                             "POC MC | 2022", "POC MC | 2024"),
          add.lines = list(c("District * Party FEs", "Y", "Y" ),
                           c("No. districts w MC race change", mod1_n, mod2_n)),  font.size="small", omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)
                    #"We present fixed effects models with standard errors clustered at the congressional district level for weighted",
                    #" CCES data from even years between 2008 to 2020. We include `District x MC party' fixed effects and controls for",
                    #" MC seniority, MC gender (gender coded as 0 for men and 1 for women), and racial resentment (with 0 indicating",
                    #" low and 1 indicating high resentment). We do not show controls for gender and seniority in the table. We also",
                    #" interact MC race with survey year. The dependent variable is MC approval, re-scaled from 0 to 1. MCs are coded",
                    #" as either white (0) or POC (1)."), 
          


## Appendix E: Yearly Table by Race ----

mod1.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

mod2.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + black_mc_nw: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

mod3.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + hispanic_mc_nw: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

mod4.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + asian_mc_nw: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

temp <- df_even_w_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp <- df_even_w_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod2_n <- nrow(treated)

temp <- df_even_w_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(hispanic_mc_nw)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(hispanic_mc_nw)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod3_n <- nrow(treated)

temp <- df_even_w_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(asian_mc_nw)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(asian_mc_nw)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod4_n <- nrow(treated)

stargazer(mod2.yearly, mod3.yearly, mod4.yearly, 
          type="latex", 
          column.labels=c("Black MC", "Hispanic MC", 
                          "Asian MC"), 
          model.numbers=F, style="apsr",
          dep.var.labels=c(""),
          keep = c("2008:black_mc_nw", "2010:black_mc_nw", "2012:black_mc_nw", "2014:black_mc_nw", "2016:black_mc_nw", 
                   "2018:black_mc_nw", "2020:black_mc_nw","2022:black_mc_nw","2024:black_mc_nw",
                   "2008:hispanic_mc_nw", "2010:hispanic_mc_nw", "2012:hispanic_mc_nw", "2014:hispanic_mc_nw", "2016:hispanic_mc_nw", 
                   "2018:hispanic_mc_nw", "2020:hispanic_mc_nw","2022:hispanic_mc_nw","2024:hispanic_mc_nw",
                   "2008:asian_mc_nw", "2010:asian_mc_nw", "2012:asian_mc_nw", "2014:asian_mc_nw", "2016:asian_mc_nw", 
                   "2018:asian_mc_nw", "2020:asian_mc_nw", "2022:asian_mc_nw", "2024:asian_mc_nw"),
          header=F, title = "Effects of MC Race on MC Approval by Year and Racial Group (Democratic Respondents)",
          covariate.labels=c("Black MC | 2008", "Black MC | 2010", "Black MC | 2012", "Black MC | 2014",
                             "Black MC | 2016", "Black MC | 2018", "Black MC | 2020", "Black MC | 2022","Black MC | 2024",
                             "Hispanic MC | 2008", "Hispanic MC | 2010", "Hispanic MC | 2012", "Hispanic MC | 2014",
                             "Hispanic MC | 2016", "Hispanic MC | 2018", "Hispanic MC | 2020","Hispanic MC | 2022",
                             "Hispanic MC | 2024",
                             "Asian MC | 2008", "Asian MC | 2010", "Asian MC | 2012", "Asian MC | 2014",
                             "Asian MC | 2016", "Asian MC | 2018", "Asian MC | 2020", "Asian MC | 2022",
                             "Asian MC | 2024"),
          add.lines = list(c("District * Party FEs", "Y", "Y", "Y"),
                           c("No. districts w MC race change", mod2_n, mod3_n, mod4_n)),  
          font.size="small", omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)

#
# Figure E1: Yearly Figure by race ----

# Democratic respondent model by year 
mod1.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

mod2.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + black_mc_nw: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

mod3.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + hispanic_mc_nw: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)

mod4.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + asian_mc_nw: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)


# create data frames with regression output of interest
mod1.yearly_df <- broom::tidy(mod1.yearly) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                "factor(year)2010:nonwhite_mc",
                                                                "factor(year)2012:nonwhite_mc",
                                                                "factor(year)2014:nonwhite_mc",
                                                                "factor(year)2016:nonwhite_mc",
                                                                "factor(year)2018:nonwhite_mc",
                                                                "factor(year)2020:nonwhite_mc",
                                                                "factor(year)2022:nonwhite_mc",
                                                                "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "All POC MCs",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

mod2.yearly_df <- broom::tidy(mod2.yearly) %>% filter(term%in%c("factor(year)2008:black_mc_nw",
                                                                "factor(year)2010:black_mc_nw",
                                                                "factor(year)2012:black_mc_nw",
                                                                "factor(year)2014:black_mc_nw",
                                                                "factor(year)2016:black_mc_nw",
                                                                "factor(year)2018:black_mc_nw",
                                                                "factor(year)2020:black_mc_nw",
                                                                "factor(year)2022:black_mc_nw",
                                                                "factor(year)2024:black_mc_nw")) %>% 
  mutate(model = "Black MCs",
         term = case_when(term=="factor(year)2008:black_mc_nw"~"2008",
                          term=="factor(year)2010:black_mc_nw"~"2010",
                          term=="factor(year)2012:black_mc_nw"~"2012",
                          term=="factor(year)2014:black_mc_nw"~"2014",
                          term=="factor(year)2016:black_mc_nw"~"2016",
                          term=="factor(year)2018:black_mc_nw"~"2018",
                          term=="factor(year)2020:black_mc_nw"~"2020",
                          term=="factor(year)2022:black_mc_nw"~"2022",
                          term=="factor(year)2024:black_mc_nw"~"2024"))

mod3.yearly_df <- broom::tidy(mod3.yearly) %>% filter(term%in%c("factor(year)2008:hispanic_mc_nw",
                                                                "factor(year)2010:hispanic_mc_nw",
                                                                "factor(year)2012:hispanic_mc_nw",
                                                                "factor(year)2014:hispanic_mc_nw",
                                                                "factor(year)2016:hispanic_mc_nw",
                                                                "factor(year)2018:hispanic_mc_nw",
                                                                "factor(year)2020:hispanic_mc_nw",
                                                                "factor(year)2022:hispanic_mc_nw",
                                                                "factor(year)2024:hispanic_mc_nw")) %>% 
  mutate(model = "Hispanic MCs",
         term = case_when(term=="factor(year)2008:hispanic_mc_nw"~"2008",
                          term=="factor(year)2010:hispanic_mc_nw"~"2010",
                          term=="factor(year)2012:hispanic_mc_nw"~"2012",
                          term=="factor(year)2014:hispanic_mc_nw"~"2014",
                          term=="factor(year)2016:hispanic_mc_nw"~"2016",
                          term=="factor(year)2018:hispanic_mc_nw"~"2018",
                          term=="factor(year)2020:hispanic_mc_nw"~"2020",
                          term=="factor(year)2022:hispanic_mc_nw"~"2022",
                          term=="factor(year)2024:hispanic_mc_nw"~"2024"))

mod4.yearly_df <- broom::tidy(mod4.yearly) %>% filter(term%in%c("factor(year)2008:asian_mc_nw",
                                                                "factor(year)2010:asian_mc_nw",
                                                                "factor(year)2012:asian_mc_nw",
                                                                "factor(year)2014:asian_mc_nw",
                                                                "factor(year)2016:asian_mc_nw",
                                                                "factor(year)2018:asian_mc_nw",
                                                                "factor(year)2020:asian_mc_nw",
                                                                "factor(year)2022:asian_mc_nw",
                                                                "factor(year)2024:asian_mc_nw")) %>% 
  mutate(model = "Asian American MCs",
         term = case_when(term=="factor(year)2008:asian_mc_nw"~"2008",
                          term=="factor(year)2010:asian_mc_nw"~"2010",
                          term=="factor(year)2012:asian_mc_nw"~"2012",
                          term=="factor(year)2014:asian_mc_nw"~"2014",
                          term=="factor(year)2016:asian_mc_nw"~"2016",
                          term=="factor(year)2018:asian_mc_nw"~"2018",
                          term=="factor(year)2020:asian_mc_nw"~"2020",
                          term=="factor(year)2022:asian_mc_nw"~"2022",
                          term=="factor(year)2024:asian_mc_nw"~"2024"))

# create tibble with all regression output
models <- rbind(mod1.yearly_df, mod2.yearly_df, mod3.yearly_df, mod4.yearly_df)

mycol<-brewer.pal(5,"Greys")[c(2,5)]

# plot coefficients
yearbyyear <- dwplot(models,
                     vline = geom_vline(
                       xintercept = 0,
                       colour = "grey60",
                       linetype = 2
                     ),
                     dot_args = list(aes(shape = model), size=3),
                     whisker_args = list(aes(color = model))) +
  theme_bw() + ylab("") +
  #ggtitle("Marginal Effect of POC MC on MC Approval by MC Racial Group & Year (White Democratic Respondents)") +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle=0, vjust=.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10))+
  scale_colour_manual(values = c("grey","grey","grey", "black")) +
  scale_shape_manual(values = c("circle","cross","square", "triangle")) +
  scale_y_discrete(limits=rev) +
  xlab("Marginal Effect of \nPOC MC on Approval") +
  ylab("Year") +
  coord_flip() +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))

yearbyyear


# figure E2: Redistricting analysis ---- 

# load daily kos 117 and 118 data 
df.kos117 <- read.csv("./DailyKos117th.csv") %>%
  select(Code, Name_first, Name_last) %>%
  separate(Code, into=c("state", "dist"), sep="-", remove=TRUE)
df.kos118 <- read.csv("./DailyKos118th.csv")%>%
  select(Code, Name_first, Name_last)%>%
  separate(Code, into=c("state", "dist"), sep="-", remove=TRUE)

# check if 117th still around and in same district 
joined1718 <- df.kos117 %>%
  left_join(df.kos118, by = c("state", "Name_last", "Name_first"))

# NAs left, so remove those 
# district switch but same name means district switches 
# just want to Keep only MCs who switched districts 
redistricted_mcs <- joined1718 %>%
  filter(!is.na(dist.y) & Name_first != "") %>% # still in congress
  filter(dist.x != dist.y) # changed districts 

# These are the state and dist.x combos to remove from the main dataset
districts_to_remove <- redistricted_mcs %>%
  select(state, dist = dist.x) %>%
  mutate(dist = as.integer(as.character(dist))) %>%
  rename(st_abbr = state)

# Now remove them from df_even_w_dem 
df_even_w_dem.noredistrict <- df_even_w_dem %>%
  anti_join(districts_to_remove, by = c("st_abbr", "dist"))

# Models by year 
# Democratic respondent model by year 
mod1.yearly <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year) | 
                      dist_party_mc | 0 | state_dist, data = df_even_w_dem, weights = df_even_w_dem$weight)
mod1.yearly.nodist <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year) | 
                             dist_party_mc | 0 | state_dist, data = df_even_w_dem.noredistrict, weights = df_even_w_dem.noredistrict$weight)

# create data frames with regression output of interest
mod1.yearly_df <- broom::tidy(mod1.yearly) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                "factor(year)2010:nonwhite_mc",
                                                                "factor(year)2012:nonwhite_mc",
                                                                "factor(year)2014:nonwhite_mc",
                                                                "factor(year)2016:nonwhite_mc",
                                                                "factor(year)2018:nonwhite_mc",
                                                                "factor(year)2020:nonwhite_mc",
                                                                "factor(year)2022:nonwhite_mc",
                                                                "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "All Districts",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

mod1.yearly_df.nodist <- broom::tidy(mod1.yearly.nodist) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                              "factor(year)2010:nonwhite_mc",
                                                                              "factor(year)2012:nonwhite_mc",
                                                                              "factor(year)2014:nonwhite_mc",
                                                                              "factor(year)2016:nonwhite_mc",
                                                                              "factor(year)2018:nonwhite_mc",
                                                                              "factor(year)2020:nonwhite_mc",
                                                                              "factor(year)2022:nonwhite_mc",
                                                                              "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Excluding districts\nwhere former MC remained\nin Congress but changed\ndistrict",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

# these ones keep both more than 90% of boundary and number
districts_to_keep <- tribble(
  ~st_abbr, ~dist,
  "AK", "1",
  "AL", "5",
  "AR", "1", "AR", "2", "AR", "3",
  "CA", "2", "CA", "17", "CA", "49",
  "CO", "1", "CO", "3", "CO", "5",
  "CT", "1", "CT", "2", "CT", "3", "CT", "4", "CT", "5",
  "DE", "1",
  "FL", "1", "FL", "8", "FL", "19",
  "GA", "1", "GA", "2", "GA", "3", "GA", "8", "GA", "10", "GA", "12",
  "HI", "1", "HI", "2",
  "ID", "1", "ID", "2",
  "IN", "1", "IN", "2", "IN", "3", "IN", "8",
  "KS", "3", "KS", "4",
  "KY", "3", "KY", "4", "KY", "5", "KY", "6",
  "MA", "1", "MA", "3", "MA", "5", "MA", "6", "MA", "7", "MA", "8", "MA", "9",
  "ME", "1", "ME", "2",
  "MN", "1", "MN", "2", "MN", "3", "MN", "4", "MN", "5", "MN", "6",
  "MO", "7",
  "MS", "1", "MS", "2", "MS", "3", "MS", "4",
  "NC", "11",
  "ND", "1",
  "NE", "2", "NE", "3",
  "NH", "1", "NH", "2",
  "NJ", "1", "NJ", "8",
  "NV", "2",
  "NY", "1", "NY", "2", "NY", "3", "NY", "4", "NY", "5", "NY", "6", "NY", "7", "NY", "8", "NY", "9",
  "NY", "10", "NY", "11", "NY", "12", "NY", "13", "NY", "15", "NY", "16", "NY", "17", "NY", "18", 
  "NY", "20", "NY", "23", "NY", "25", "NY", "26",
  "OK", "1", "OK", "2", "OK", "4",
  "OR", "2", "OR", "3", "OR", "4",
  "PA", "1", "PA", "2", "PA", "3", "PA", "6", "PA", "7", "PA", "8", "PA", "10", "PA", "11", "PA", "17",
  "RI", "1", "RI", "2",
  "SC", "1", "SC", "2", "SC", "3", "SC", "4", "SC", "5", "SC", "7",
  "SD", "1",
  "TN", "1", "TN", "2",
  "TX", "12", "TX", "16", "TX", "19",
  "VA", "8",
  "VT", "1",
  "WA", "3", "WA", "4", "WA", "5", "WA", "6", "WA", "7", "WA", "10",
  "WI", "2", "WI", "3", "WI", "6", "WI", "7", "WI", "8",
  "WY", "1"
) %>% 
  mutate(dist = as.integer(dist))


df_filtered <- df_even_w_dem %>%
  inner_join(districts_to_keep, by = c("st_abbr", "dist"))

df_filtered <- df_even_w_dem %>%
  # keep all districts for years other than 2024
  filter(year != 2024 | (year == 2024 & paste0(st_abbr, dist) %in% 
                           paste0(districts_to_keep$st_abbr, districts_to_keep$dist)))



mod1.yearly.nodist2 <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year) | 
                              dist_party_mc | 0 | state_dist, data = df_filtered, weights = df_filtered$weight)

mod1.yearly_df.nodist2 <- broom::tidy(mod1.yearly.nodist2) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                                "factor(year)2010:nonwhite_mc",
                                                                                "factor(year)2012:nonwhite_mc",
                                                                                "factor(year)2014:nonwhite_mc",
                                                                                "factor(year)2016:nonwhite_mc",
                                                                                "factor(year)2018:nonwhite_mc",
                                                                                "factor(year)2020:nonwhite_mc",
                                                                                "factor(year)2022:nonwhite_mc",
                                                                                "factor(year)2024:nonwhite_mc")) %>% 
  mutate(model = "Excluding districts\npost-2022 with new\nboundaries and numbers",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022",
                          term=="factor(year)2024:nonwhite_mc"~"2024"))

models <- rbind(mod1.yearly_df, mod1.yearly_df.nodist, mod1.yearly_df.nodist2)


dwplot(models,
       vline = geom_vline(
         xintercept = 0,
         colour = "grey60",
         linetype = 2
       ),
       dot_args = list(aes(shape = model), size=3),
       whisker_args = list(aes(color = model))) +
  theme_bw() + ylab("") +
  #ggtitle("Marginal Effect of POC MC on MC Approval by Constituent Party and Year") +
  xlab("Marginal Effect of \n POC MC on Approval") +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12, angle=0, vjust = 0.5),
        axis.title.y = element_text(size = 12, angle=0, vjust = 0.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10),
        axis.text = element_text(size=10))+
  scale_colour_manual(values = c("darkgrey", "black", "lightgrey")) +
  scale_y_discrete(limits=rev) +
  ylab("Year") +
  coord_flip() +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))

#
## Appendix F: Knowledge of MC Race ----
# Table F1: Yearly effects by accurately perceiving race ---- 

df_high.knowledge_dem <- df_even_w_dem %>%
  filter(race_correct_nw==1)

df_low.knowledge_dem <- df_even_w_dem %>%
  filter(race_correct_nw==0)

## Basic approval for white dems over time 
# High Knowledge respondents 
mod1.hk <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                | dist_party_mc | 0 | state_dist, data=df_high.knowledge_dem,weights=df_high.knowledge_dem$weight)
# Low Knowledge respondents 
mod1.lk <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                | dist_party_mc | 0 | state_dist, data=df_low.knowledge_dem,weights=df_low.knowledge_dem$weight)

temp <- df_high.knowledge_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp <- df_low.knowledge_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(mc_gender)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod2_n <- nrow(treated)

stargazer(mod1.lk, mod1.hk, type="latex", 
          column.labels=c("Low Knowledge", "High Knowledge"), 
          model.numbers=F, style="apsr",
          dep.var.labels=c(""),
          header=F, title = "Effects of POC MC on MC Approval by Year",
          covariate.labels=c("White MC | 2010", "White MC | 2012", "White MC | 2014", "White MC | 2016",
                             "White MC | 2018", "White MC | 2020","White MC | 2022","White MC | 2024", "MC Seniority", "MC Gender", 
                             "POC MC | 2008",
                             "POC MC | 2010", "POC MC | 2012", "POC MC | 2014",
                             "POC MC | 2016", "POC MC | 2018", "POC MC | 2020",
                             "POC MC | 2022", "POC MC | 2024"),
          add.lines = list(c("District * Party FEs", "Y", "Y" ),
                           c("No. districts w MC race change", mod1_n, mod2_n)),  font.size="small", omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)


# Table F2: RR x POC MC by knowledge of MC Race ----

df_high.knowledge_dem <- df_even_w_dem %>%
  filter(race_correct_nw==1) %>%
  mutate(rr_index_flip = 1-rr_index)

df_low.knowledge_dem <- df_even_w_dem %>%
  filter(race_correct_nw==0) %>%
  mutate(rr_index_flip = 1-rr_index)


# interact POC MC*rr with district-level controls for White Dems for low and high knowledge

#high 
mod_rr1_highknow <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                       cong + dist_party_mc | 0 | state_dist, data=df_high.knowledge_dem, 
                     weights=df_high.knowledge_dem$weight)

# low 
mod_rr1_lowknow <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                          cong + dist_party_mc | 0 | state_dist, data=df_low.knowledge_dem, 
                        weights=df_low.knowledge_dem$weight)

# calculate number of treated districts in each dataset
temp <- df_high.knowledge_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp3 <- df_low.knowledge_dem %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated3 <- temp3 %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod3_n <- nrow(treated3)

# table
stargazer(mod_rr1_highknow, mod_rr1_lowknow, type="latex", 
          column.labels=c("Accurately Identify MC Race", "Inaccurately Identify MC Race"), 
          model.numbers=F, style="apsr",
          header=F, title = "Effect of POC MC on MC Approval, 
          Interacting MC Race with Racial Resentment for High and Low Knowledge Respondents (White Respondents)",
          omit = c("mc_seniority", "mc_gender"),
          order = c("nonwhite_mc", "rr_index:nonwhite_mc", "rr_index"),
          covariate.labels=c("POC MC", "POC MC x Racial Resentment", "Racial Resentment"),
          add.lines = list(c("District * MC Party FEs", "Y", "Y"),
                           c("Congressional session FEs", "Y", "Y"),
                           c("No. districts w MC race change", mod1_n, mod3_n)), 
          dep.var.labels=c("MC approval"), font.size="small", omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          #label = "tab:dems-rr",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)

#
# Table F3 & F4: Table for correctly perceiving race ----



table(df_even_w_dem$perceived_race, useNA = "always") 

# (35503/nrow(df_even_w_dem))*100
  # only 23.18% didn't respond to this 
  # so we have about 3/4 who did 

df_even_w_dem <- df_even_w_dem %>%
  # assigning race numbers like perception question
  mutate(mc_race.num = case_when(nonwhite_mc == 0 ~ 1,
                                 black_mc == 1 ~ 2,
                                 hispanic_mc == 1 ~ 3,
                                 asian_mc == 1 ~ 4,
                                 nonwhite_mc == 1 & black_mc == 0 & hispanic_mc == 0 & asian_mc == 0 ~ 5,
                                 TRUE ~ NA_real_),
         # generate variable for when the race matches the perception
         perceived_race_cong = case_when(mc_race.num == perceived_race ~ 1,
                                        !is.na(perceived_race) & mc_race.num != perceived_race ~ 0,
                                        is.na(perceived_race) ~ NA_real_))

# Table to get percentage who correctly identify out of those who respond to question
tab.race2 <- df_even_w_dem %>%
  mutate(mc_race = case_when(mc_race.num == 1 ~ "White",
                             mc_race.num == 2 ~ "Black",
                             mc_race.num == 3 ~ "Hispanic",
                             mc_race.num == 4 ~ "Asian",
                             mc_race.num == 5 ~ "Nonwhite")) %>%
  filter(!is.na(perceived_race_cong) & mc_race!="Nonwhite") %>%
  group_by(mc_race) %>%
  summarise(mean_prc = round((sum(perceived_race_cong, na.rm=T) / n())*100,2)) %>%
  ungroup() 

tab.race <- df_even_w_dem %>%
  filter(!is.na(perceived_race_cong) & mc_race!="Nonwhite") %>%
  group_by(nonwhite_mc) %>%
  summarise(mean_prc = round((sum(perceived_race_cong, na.rm=T) / n())*100,2)) %>%
  ungroup() 

tab.race$nonwhite_mc <- ifelse(tab.race$nonwhite_mc==1, "POC", "White")

colnames(tab.race) <- c("MC Race", "Pct. respondents who identify correctly")


tab.race.print <- kbl(tab.race, format="latex", booktabs = T, caption = "Percentage of respondents who correctly identify their MC's Race (split by MC race)") %>%
  kable_classic(full_width = F, html_font = "Cambria") %>%
  kable_styling(latex_options = c("hold_position")) 

tab.race.print

# by racial group 
colnames(tab.race2) <- c("MC Race", "Pct. respondents who identify correctly")


tab.race.print2 <- kbl(tab.race2, format="latex", booktabs = T, caption = "Percentage of respondents who correctly identify their MC's Race (split by MC race)") %>%
  kable_classic(full_width = F, html_font = "Cambria") %>%
  kable_styling(latex_options = c("hold_position")) 

tab.race.print2

#
## Appendix G: Ideology & RR over time ----
# Fig G1: RR ----
# restrict to D and R 
df_even_w.2 <- df_even_w %>%
  filter(party7==1 | party7==3) %>%
  mutate(party7 = as.character(party7))

df_even_b.2 <- df_even2 %>%
  filter(party7 == 1 | party7==3) %>%
  filter(black_mc_nw==1) %>%
  mutate(party7 = as.character(party7))

# Calculate means and standard errors for white respondents
average.rr.w <- df_even_w.2 %>%
  select(rr_index, year, party7) %>%
  filter(year != 2008) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = mean(rr_index, na.rm = T),
    se = sd(rr_index, na.rm = T) / sqrt(n()),
    n = n()
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(
           party7==1 ~ "White Democrats",
           party7==3 ~ "White Republicans"
         ))

# Calculate means and standard errors for black respondents
average.rr.b <- df_even_b.2 %>%
  select(rr_index, year, party7) %>%
  filter(year != 2008) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = mean(rr_index, na.rm = T),
    se = sd(rr_index, na.rm = T) / sqrt(n()),
    n = n()
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(
           party7==1 ~ "Black Democrats",
           party7==3 ~ "Black Republicans"
         ))

# Combine the datasets
avg.combined <- rbind(average.rr.b, average.rr.w)

# Create the plot with error bars and larger points
ggplot(avg.combined, aes(year, avg., group=party7)) +
  # Add error bars
  geom_errorbar(aes(ymin = avg. - 1.96*se, 
                    ymax = avg. + 1.96*se,
                    color = party7),
                width = 0.2) +
  # Increase point size to 3
  geom_point(aes(color=party7, shape=party7), size = 3) + 
  geom_line(aes(color=party7)) +
  theme_bw() +
  scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024)) +
  scale_color_grey() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12, angle=0, vjust=.5),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10)) +
  labs(x = "Year", y = "Average Racial \nResentment") +
  ylim(0,1)

#  
# Fig G2: Ideology ----
df_even_w.2 <- df_even_w %>%
  filter(party7==1 | party7==3) %>%
  mutate(party7 = as.character(party7))

df_even_nw.2 <- df_even2 %>%
  filter(party7 == 1 | party7==3) %>%
  filter(nonwhite_mc==1) %>%
  mutate(party7 = as.character(party7))

# generate average ideo in each year for whites
average.ideo.w <- df_even_w.2 %>%
  select(ideo_self, year, party7) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = mean(ideo_self, na.rm = T),
    se = sd(ideo_self, na.rm = T) / sqrt(n()),
    n = n()
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(party7==1 ~ "White Democrats",
                            party7==3 ~ "White Republicans"))

average.ideo.nw <- df_even_nw.2 %>%
  select(ideo_self, year, party7) %>%
  group_by(year, party7) %>%
  summarise(
    avg. = mean(ideo_self, na.rm = T),
    se = sd(ideo_self, na.rm = T) / sqrt(n()),
    n = n()
  ) %>%
  mutate(party7 = as.character(party7),
         party7 = case_when(party7==1 ~ "POC Democrats",
                            party7==3 ~ "POC Republicans"))

combo.ideo <- rbind(average.ideo.w, average.ideo.nw)

ggplot(combo.ideo, aes(year, avg., group=party7)) +
  # Add error bars
  geom_errorbar(aes(ymin = avg. - 1.96*se, 
                    ymax = avg. + 1.96*se,
                    color = party7),
                width = 0.2) +
  # Increase point size to 3
  geom_point(aes(color=party7, shape=party7), size = 3) + 
  geom_line(aes(color=party7)) +
  theme_bw() +
  scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024)) +
  scale_color_grey() +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12, angle=0, vjust=.5),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10)) +
  labs(x = "Year", y = "Average \nSelf-Placed\n Ideology") +
  ylim(0,1) 

#

## Appendix H: ----
# Table H1: POC x RR - all years - Table 1 + district-level controls ----

# Start with Racial Resentment 
# flip index 
df_even_w_dem.index <- df_even_w_dem %>%
  mutate(rr_index_flip = 1-rr_index)

# interact POC MC*rr with district-level controls for White Dems 
mod_rr1_flip.c <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                         cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)

mod_rr3_flip.c <- felm(approval_rep ~ rr_index_flip + rr_index_flip*black_mc_nw + mc_seniority + mc_gender | 
                         cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)

# calculate number of treated districts in each dataset
temp <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp3 <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated3 <- temp3 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod3_n <- nrow(treated3)


# Now the FIRE scales 
# flip index 
df_even_w_dem.fire <- df_even_w_dem %>%
  # making it so higher values are more liberal (initial coding in cleaning was high values as conservative)
  mutate(fire1_index_flip = 1-fire1,
         fire2_index_flip = 1-fire2) 

# interact POC MC*rr with district-level controls for White Dems 
mod1_fire1_flip.c <- felm(approval_rep ~ fire1_index_flip + fire1_index_flip*nonwhite_mc + mc_seniority + mc_gender  | 
                            cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod2_fire1_flip.c <- felm(approval_rep ~ fire1_index_flip + fire1_index_flip*black_mc_nw + mc_seniority + mc_gender | 
                            cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod3_fire2_flip.c <- felm(approval_rep ~ fire2_index_flip + fire2_index_flip*nonwhite_mc + mc_seniority + mc_gender| 
                            cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod4_fire2_flip.c <- felm(approval_rep ~ fire2_index_flip + fire2_index_flip*black_mc_nw + mc_seniority + mc_gender  | 
                            cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

# calculate number of treated districts in each dataset
temp4 <- df_even_w_dem.fire %>%
  filter(!is.na(fire1)&!is.na(fire2)&!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated4 <- temp4 %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod4_n <- nrow(treated4)

temp5 <- df_even_w_dem.fire %>%
  filter(!is.na(fire1)&!is.na(fire2)&!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated5 <- temp5 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod5_n <- nrow(treated5)


# table
models_rrFIRE.c <- list(mod_rr1_flip.c, mod1_fire1_flip.c,mod3_fire2_flip.c,
                        mod_rr3_flip.c, mod2_fire1_flip.c, mod4_fire2_flip.c)

stargazer(models_rrFIRE.c,
          type="latex", 
          model.numbers=F, style="apsr",
          header=F, title = "Effects of POC/Black MC on MC Approval, Interacting MC Race with Racial Resentment and FIRE Scale + Controls (White Respondents)",
          column.labels=c("POC MC", "Black MC"), 
          covariate.labels=c("Reverse-scaled Racial Resentment", "FIRE 1",
                             "Reverse-scaled FIRE 2", "POC MC", "Black MC", 
                             "MC Seniority", "Woman MC",
                             "POC MC x Reverse-scaled RR",
                             "POC MC x FIRE 1",
                             "POC MC x Reverse-scaled FIRE 2",
                             "Black MC x Reverse-scaled RR", 
                             "Black MC x FIRE 1",
                             "Black MC x Reverse-scaled FIRE 2"),
          add.lines = list(c("District * MC Party FEs", "Y", "Y", "Y", "Y", "Y", "Y"),
                           c("Congressional session FEs", "Y", "Y", "Y", "Y", "Y", "Y"),
                           c("No. districts w MC race change", mod1_n, mod4_n,mod4_n , mod3_n, mod5_n, mod5_n)), 
          dep.var.labels=c("MC approval"), font.size="small", 
          omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)

#
# Table H2: POC x RR - all years - Table 1 + district-level controls ----

# Start with Racial Resentment 
# flip index 
df_even_w_dem.index <- df_even_w_dem %>%
  mutate(rr_index_flip = 1-rr_index)

# interact POC MC*rr with district-level controls for White Dems 
mod_rr1_flip.c <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender +
                       gender + age + ideo_self + educ | 
                       cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)

mod_rr3_flip.c <- felm(approval_rep ~ rr_index_flip + rr_index_flip*black_mc_nw + mc_seniority + mc_gender +
                       gender + age + ideo_self + educ | 
                       cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index, weights=df_even_w_dem.index$weight)

# calculate number of treated districts in each dataset
temp <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n <- nrow(treated)

temp3 <- df_even_w_dem.index %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated3 <- temp3 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod3_n <- nrow(treated3)


# Now the FIRE scales 
# flip index 
df_even_w_dem.fire <- df_even_w_dem %>%
  # making it so higher values are more liberal (initial coding in cleaning was high values as conservative)
  mutate(fire1_index_flip = 1-fire1,
         fire2_index_flip = 1-fire2) 

# interact POC MC*rr with district-level controls for White Dems 
mod1_fire1_flip.c <- felm(approval_rep ~ fire1_index_flip + fire1_index_flip*nonwhite_mc + mc_seniority + mc_gender + 
                            gender + age + ideo_self + educ | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod2_fire1_flip.c <- felm(approval_rep ~ fire1_index_flip + fire1_index_flip*black_mc_nw + mc_seniority + mc_gender + 
                            gender + age + ideo_self + educ | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod3_fire2_flip.c <- felm(approval_rep ~ fire2_index_flip + fire2_index_flip*nonwhite_mc + mc_seniority + mc_gender + 
                            gender + age + ideo_self + educ | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

mod4_fire2_flip.c <- felm(approval_rep ~ fire2_index_flip + fire2_index_flip*black_mc_nw + mc_seniority + mc_gender + 
                            gender + age + ideo_self + educ | 
                          cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.fire, weights=df_even_w_dem.fire$weight)

# calculate number of treated districts in each dataset
temp4 <- df_even_w_dem.fire %>%
  filter(!is.na(fire1)&!is.na(fire2)&!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated4 <- temp4 %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod4_n <- nrow(treated4)

temp5 <- df_even_w_dem.fire %>%
  filter(!is.na(fire1)&!is.na(fire2)&!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated5 <- temp5 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod5_n <- nrow(treated5)


# table
models_rrFIRE.c <- list(mod_rr1_flip.c, mod1_fire1_flip.c,mod3_fire2_flip.c,
                      mod_rr3_flip.c, mod2_fire1_flip.c, mod4_fire2_flip.c)

stargazer(models_rrFIRE.c,
          type="latex", 
          model.numbers=F, style="apsr",
          header=F, title = "Effects of POC/Black MC on MC Approval, Interacting MC Race with Racial Resentment and FIRE Scale + District Controls (White Respondents)",
          column.labels=c("POC MC", "Black MC"), 
          covariate.labels=c("Reverse-scaled Racial Resentment", "FIRE 1",
                             "Reverse-scaled FIRE 2", "POC MC", "Black MC", 
                             "MC Seniority", "Woman MC",
                             "Gender","Age","Ideology","Education","POC MC x Reverse-scaled RR",
                             "POC MC x FIRE 1",
                             "POC MC x Reverse-scaled FIRE 2",
                             "Black MC x Reverse-scaled RR", 
                             "Black MC x FIRE 1",
                             "Black MC x Reverse-scaled FIRE 2"),
          add.lines = list(c("District * MC Party FEs", "Y", "Y", "Y", "Y", "Y", "Y"),
                           c("Congressional session FEs", "Y", "Y", "Y", "Y", "Y", "Y"),
                           c("No. districts w MC race change", mod1_n, mod4_n,mod4_n , mod3_n, mod5_n, mod5_n)), 
          dep.var.labels=c("MC approval"), font.size="small", 
          omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)


#

# Fig H1: RR by approval for each year ----

# Raw Data 
df_even_w_dem.sub <- df_even_w_dem %>%
  filter(!is.na(nonwhite_mc)) %>%
  # just Dem MCs
  filter(mc_party==1) %>%
  mutate(nonwhite_mc = case_when(nonwhite_mc==0 ~ "White",
                                 nonwhite_mc==1 ~ "POC")) %>%
  filter(year != "2008")

ggplot(df_even_w_dem.sub,aes(y=approval_rep,x=rr_index, color=nonwhite_mc, type=nonwhite_mc))+
  theme_bw() +
  scale_color_manual(values=c('black','darkgrey')) +
  stat_smooth(method="lm",se=T) +
  xlab("Racial Resentment") + ylab("MC Approval") +
  facet_wrap(~year) +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10))

#
# Fig H2: Change in RR by Change in Approval ----
df.new <- df_even_w_dem %>%
  #   restrict to just D MCs for now
  filter(mc_party==1) %>%
  # select variables of interest
  select(year, case_id, weight, st, dist, approval_rep, mc_party, 
         #mc_name_first, mc_name_last, 
         mc_gender,nonwhite_mc, rr_index) %>%
  # Filter to just 2014 to 2020 
  filter(year %in% c(2014, 2016, 2018, 2020, 2022)) %>%
  #need avg approval and avg rr for each yr-st-district match 
  group_by(year, st, dist, mc_party, mc_gender, nonwhite_mc) %>%
  summarise(avg.rr = mean(rr_index, na.rm=T),
            avg.weight = mean(weight, na.rm=T),
            avg.approval = mean(approval_rep, na.rm=T)) %>%
  ungroup()

# need each year separate to rename their variables for the year
df.new.2014 <- df.new %>%
  filter(year==2014) %>%
  rename(avg.rr.2014 = avg.rr,
         avg.weight.2014 = avg.weight,
         avg.approval.2014 = avg.approval,
         mc_party.2014 = mc_party,
         mc_gender.2014 = mc_gender,
         nonwhite_mc.2014 = nonwhite_mc) %>%
  select(-year)

df.new.2016 <- df.new %>%
  filter(year==2016) %>%
  rename(avg.rr.2016 = avg.rr,
         avg.weight.2016 = avg.weight,
         avg.approval.2016 = avg.approval,
         mc_party.2016 = mc_party,
         mc_gender.2016 = mc_gender,
         nonwhite_mc.2016 = nonwhite_mc) %>%
  select(-year)

df.new.2018 <- df.new %>%
  filter(year==2018) %>%
  rename(avg.rr.2018 = avg.rr,
         avg.weight.2018 = avg.weight,
         avg.approval.2018 = avg.approval,
         mc_party.2018 = mc_party,
         mc_gender.2018 = mc_gender,
         nonwhite_mc.2018 = nonwhite_mc) %>%
  select(-year)

df.new.2020 <- df.new %>%
  filter(year==2020) %>%
  rename(avg.rr.2020 = avg.rr,
         avg.weight.2020 = avg.weight,
         avg.approval.2020 = avg.approval,
         mc_party.2020 = mc_party,
         mc_gender.2020 = mc_gender,
         nonwhite_mc.2020 = nonwhite_mc) %>%
  select(-year)

df.new.2022 <- df.new %>%
  filter(year==2022) %>%
  rename(avg.rr.2022 = avg.rr,
         avg.weight.2022 = avg.weight,
         avg.approval.2022 = avg.approval,
         mc_party.2022 = mc_party,
         mc_gender.2022 = mc_gender,
         nonwhite_mc.2022 = nonwhite_mc) %>%
  select(-year)

# join back together 
df.new2 <- df.new.2014 %>%
  left_join(df.new.2016, by=c("st", "dist")) %>%
  left_join(df.new.2018, by=c("st", "dist")) %>%
  left_join(df.new.2020, by=c("st", "dist")) %>%
  left_join(df.new.2022, by=c("st", "dist")) %>%
  mutate(change.rr.1416.w = case_when(nonwhite_mc.2014 == 0 & nonwhite_mc.2016 == 0 ~ avg.rr.2016-avg.rr.2014,
                                      TRUE ~ NA_real_), 
         change.rr.1618.w = case_when(nonwhite_mc.2016 == 0 & nonwhite_mc.2018 == 0 ~ avg.rr.2018-avg.rr.2016,
                                      TRUE ~ NA_real_),
         change.rr.1820.w = case_when(nonwhite_mc.2018 == 0 & nonwhite_mc.2020 == 0 ~ avg.rr.2020-avg.rr.2018,
                                      TRUE ~ NA_real_),
         change.rr.2022.w = case_when(nonwhite_mc.2020 == 0 & nonwhite_mc.2022 == 0 ~ avg.rr.2022-avg.rr.2020,
                                      TRUE ~ NA_real_),
         change.rr.1416.nw = case_when(nonwhite_mc.2014 == 1 & nonwhite_mc.2016 == 1 ~ avg.rr.2016-avg.rr.2014,
                                       TRUE ~ NA_real_), 
         change.rr.1618.nw = case_when(nonwhite_mc.2016 == 1 & nonwhite_mc.2018 == 1 ~ avg.rr.2018-avg.rr.2016,
                                       TRUE ~ NA_real_),
         change.rr.1820.nw = case_when(nonwhite_mc.2018 == 1 & nonwhite_mc.2020 == 1 ~ avg.rr.2020-avg.rr.2018,
                                       TRUE ~ NA_real_),
         change.rr.2022.nw = case_when(nonwhite_mc.2020 == 1 & nonwhite_mc.2022 == 1 ~ avg.rr.2022-avg.rr.2020,
                                       TRUE ~ NA_real_),
         change.app.1416.w = case_when(nonwhite_mc.2014 == 0 & nonwhite_mc.2016 == 0 ~ avg.approval.2016-avg.approval.2014,
                                       TRUE ~ NA_real_),
         change.app.1618.w = case_when(nonwhite_mc.2016 == 0 & nonwhite_mc.2018 == 0 ~ avg.approval.2018-avg.approval.2016,
                                       TRUE ~ NA_real_),
         change.app.1820.w = case_when(nonwhite_mc.2018 == 0 & nonwhite_mc.2020 == 0 ~ avg.approval.2020-avg.approval.2018,
                                       TRUE ~ NA_real_),
         change.app.2022.w = case_when(nonwhite_mc.2020 == 0 & nonwhite_mc.2022 == 0 ~ avg.approval.2022-avg.approval.2020,
                                       TRUE ~ NA_real_),
         change.app.1416.nw = case_when(nonwhite_mc.2014 == 1 & nonwhite_mc.2016 == 1 ~ avg.approval.2016-avg.approval.2014,
                                        TRUE ~ NA_real_),
         change.app.1618.nw = case_when(nonwhite_mc.2016 == 1 & nonwhite_mc.2018 == 1 ~ avg.approval.2018-avg.approval.2016,
                                        TRUE ~ NA_real_),
         change.app.1820.nw = case_when(nonwhite_mc.2018 == 1 & nonwhite_mc.2020 == 1 ~ avg.approval.2020-avg.approval.2018,
                                        TRUE ~ NA_real_),
         change.app.2022.nw = case_when(nonwhite_mc.2020 == 1 & nonwhite_mc.2022 == 1 ~ avg.approval.2022-avg.approval.2020,
                                        TRUE ~ NA_real_)) %>%
  # now take these and get average for each district 
  mutate(change.rr.avg.w = (change.rr.1416.w + change.rr.1618.w + change.rr.1820.w + change.rr.2022.w)/4,
         change.rr.avg.nw = (change.rr.1416.nw + change.rr.1618.nw + change.rr.1820.nw + change.rr.2022.nw)/4,
         change.app.avg.w = (change.app.1416.w + change.app.1618.w + change.app.1820.w + change.app.2022.w)/4,
         change.app.avg.nw = (change.app.1416.nw + change.app.1618.nw + change.app.1820.nw + change.app.2022.nw)/4)

ggplot(df.new2, aes(x=change.rr.avg.nw, y=change.app.avg.nw)) +
  geom_point()+
  geom_smooth(method='lm', se=F, color="Black") +
  xlab("Change in RR (negative values indicate decreasing resentment)") +
  ylab("Change in Approval (negative values indicate decreasing approval)") +
  theme_bw()

# Separate, create var, put back together to get by color in one figure

df.new.white <- df.new2 %>%
  filter(!is.na(change.rr.avg.w)) %>%
  mutate(poc.mc = 0)

df.new.poc <- df.new2 %>%
  filter(!is.na(change.rr.avg.nw)) %>%
  mutate(poc.mc=1)

df.new3 <- rbind(df.new.white, df.new.poc) %>%
  mutate(change.rr.avg = case_when(poc.mc==0 ~ change.rr.avg.w,
                                   poc.mc==1 ~ change.rr.avg.nw),
         change.app.avg = case_when(poc.mc==0 ~ change.app.avg.w,
                                    poc.mc==1 ~ change.app.avg.nw),
         poc.mc = case_when(poc.mc==0 ~ "White MC",
                            poc.mc==1 ~ "POC MC")) %>%
  #need to remove outlier  
  mutate(remove.indicator = case_when(st==15 & dist==2 ~ 1,
                                      TRUE ~ 0)) %>%
  filter(remove.indicator == 0) %>%
  select (-remove.indicator)

ggplot(df.new3, aes(x=change.rr.avg, y=change.app.avg, color=poc.mc)) +
  geom_point() + geom_smooth(method = "lm", aes(color=poc.mc), se=F) +
  xlab("Change in RR\n(negative values indicate decreasing resentment)") +
  ylab("Change in Approval\n(positive values indicate increasing approval)") +
  scale_color_grey() +
  theme_bw() +
  theme(legend.title = element_blank(),
        legend.position = "bottom")



#
# Table H3: POC x RR (2012-2020)---- 

# flip index 
df_even_w_dem.index1220 <- df_even_w_dem %>%
  filter(year %in% c(2012, 2014, 2016, 2018, 2020)) %>%
  mutate(rr_index_flip = 1-rr_index) #%>%
#filter(mc_party==1)

# interact POC MC*rr with district-level controls for White Dems 
mod_rr1_flip1220 <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                            cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index1220, weights=df_even_w_dem.index1220$weight)

mod_rr3_flip1220 <- felm(approval_rep ~ rr_index_flip + rr_index_flip*black_mc_nw + mc_seniority + mc_gender | 
                            cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index1220, weights=df_even_w_dem.index1220$weight)

#
# Table H3: POC x RR (2022-2024)---- 

# flip index 
df_even_w_dem.index2224 <- df_even_w_dem %>%
  filter(year %in% c(2022, 2024)) %>%
  mutate(rr_index_flip = 1-rr_index) #%>%
#filter(mc_party==1)

# interact POC MC*rr with district-level controls for White Dems 
mod_rr1_flip1422 <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender | 
                           cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index2224, weights=df_even_w_dem.index2224$weight)

mod_rr3_flip1422 <- felm(approval_rep ~ rr_index_flip + rr_index_flip*black_mc_nw + mc_seniority + mc_gender | 
                           cong + dist_party_mc | 0 | state_dist, data=df_even_w_dem.index2224, weights=df_even_w_dem.index2224$weight)

#
# Table H3: Combine into table for 2012-2020 and 2022-2024 ----

# calculate number of treated districts in each dataset
temp1220 <- df_even_w_dem.index1220 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated1220 <- temp1220 %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n1220 <- nrow(treated1220)

temp2224 <- df_even_w_dem.index2224 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated2224 <- temp2224 %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
mod1_n2224 <- nrow(treated2224)

temp1220.2 <- df_even_w_dem.index1220 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated1220.2 <- temp1220.2 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod3_n1220.2 <- nrow(treated1220.2)

temp2224.2 <- df_even_w_dem.index2224 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(black_mc_nw)&!is.na(mc_seniority)&!is.na(cong)&!is.na(dist_party_mc)&!is.na(mc_gender)&!is.na(rr_index)) %>%
  group_by(state_dist, mc_party) %>%
  mutate(pct_black_mc = as.numeric(black_mc_nw)) %>%
  summarise_at(vars(pct_black_mc), mean, na.rm=T)
treated2224.2 <- temp2224.2 %>% filter(pct_black_mc > 0 & pct_black_mc < 1)
mod3_n2224.2 <- nrow(treated2224.2)

# table
stargazer(mod_rr1_flip1220, mod_rr3_flip1220,
          mod_rr1_flip1422, mod_rr3_flip1422, 
          type="latex", 
          column.labels=c("POC MC (12-20)", "Black MC (12-20)", "POC MC (22-24)", "Black MC (22-24)"), 
          model.numbers=F, style="apsr",
          header=F, title = "Effects of POC/Black MC on MC Approval, Interacting MC Race with Racial Resentment (White Respondents) - Split by year",
          order = c("nonwhite_mc", "rr_index_flip:nonwhite_mc", "black_mc_nw", "rr_index_flip:black_mc_nw", "rr_index_flip", "mc_seniority", "mc_gender",
                    "gender", "age", "ideo_self", "educ"),
          covariate.labels=c("POC MC", "POC MC x Racial Resentment", "Black MC","Black MC x Resentment","Racial Resentment", "MC Seniority", "MC Gender",
                             "Gender", "Age", "Ideology", "Education"),
          add.lines = list(c("District * MC Party FEs", "Y", "Y", "Y", "Y"),
                           c("Congressional session FEs", "Y", "Y", "Y", "Y"),
                           c("No. districts w MC race change", mod1_n1220, mod1_n2224, 
                             mod3_n1220.2, mod3_n2224.2)), 
          dep.var.labels=c("MC approval"), font.size="small", 
          omit.stat = c("f", "ser", "adj.rsq", "rsq"),
          column.sep.width = "-3pt", 
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(0.1, .05, .01, .001),
          notes.align = "l",
          notes = c("+p < 0.1; *p < 0.05; **p < 0.01; ***p < 0.001"), notes.append = F)


#
# z-test to compare Black and POC marginal effects ---- 

# doing for the interaction term 
Z <- (mod_rr1_flip$coefficients[[5]] - mod_rr3_flip$coefficients[[5]])/
  (sqrt((mod_rr1_flip$se[[5]])^2 + (mod_rr3_flip$se[[5]])^2))
pval <- 2 * pnorm(-abs(Z))
Eq.coeff <- rbind(Z = round(Z, 3), pval = round(pval, 3))
print(Eq.coeff)

#
# Table H4: Alternative estimates for TWFE ----
df_even_w_dem.index <- df_even_w_dem %>%
  mutate(rr_index_flip = 1-rr_index)

# White Democrats
## 1) make 2-session subsets 
## year subsets with new variable for 1 to 0 switch 
sub_0810_wd <- df_even_w_dem.index %>%
  # filter years 
  filter(cong==110 | cong==111) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))
sub_1012_wd <- df_even_w_dem.index %>%
  filter(cong==111 | cong==112) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))
sub_1214_wd <- df_even_w_dem.index %>%
  filter(cong==112 | cong==113) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))
sub_1416_wd <- df_even_w_dem.index %>%
  filter(cong==113 | cong==114) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))
sub_1618_wd <- df_even_w_dem.index %>%
  filter(cong==114 | cong==115) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))
sub_1820_wd <- df_even_w_dem.index %>%
  filter(cong==115 | cong==116) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))
sub_2022_wd <- df_even_w_dem.index %>%
  filter(cong==116 | cong==117) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))
sub_2224_wd <- df_even_w_dem.index %>%
  filter(cong==117 | cong==118) %>%
  # generate switch variable 
  mutate(bad_switch = case_when(nonwhite_mc==0 & lead1_nonwhite_mc==1 ~ 1, TRUE ~ 0))

## 2) Remove any switches from 1 to 0 
# Calculate the number of 1 to 0 switches in each subset 
sub_0810_switch_wd <- sub_0810_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

sub_1012_switch_wd <- sub_1012_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

sub_1214_switch_wd <- sub_1214_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

sub_1416_switch_wd <- sub_1416_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

sub_1618_switch_wd <- sub_1618_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

sub_1820_switch_wd <- sub_1820_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

sub_2022_switch_wd <- sub_2022_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

sub_2224_switch_wd <- sub_2224_wd %>%
  group_by(state_dist, cong) %>%
  summarise_at(vars(bad_switch), sum, na.rm=T) %>%
  filter(bad_switch>0)

# Remove the rows where bad_switch is 1 (anywhere that became 0 after 1) - we only want the 0 to 1 switches 
sub_0810_wd.2 <- sub_0810_wd %>%
  filter(bad_switch == 0)
sub_1012_wd.2 <- sub_1012_wd %>%
  filter(bad_switch == 0)
sub_1214_wd.2 <- sub_1214_wd %>%
  filter(bad_switch == 0)
sub_1416_wd.2 <- sub_1416_wd %>%
  filter(bad_switch == 0)
sub_1618_wd.2 <- sub_1618_wd %>%
  filter(bad_switch == 0)
sub_1820_wd.2 <- sub_1820_wd %>%
  filter(bad_switch == 0)
sub_2022_wd.2 <- sub_2022_wd %>%
  filter(bad_switch == 0)
sub_2224_wd.2 <- sub_2224_wd %>%
  filter(bad_switch == 0)

# create weights 
sub_0810_temp <- sub_0810_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated <- sub_0810_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight1_wd <- nrow(treated) 

sub_1012_temp <- sub_1012_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated2 <- sub_1012_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight2_wd <- nrow(treated2) 

sub_1214_temp <- sub_1214_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated3 <- sub_1214_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight3_wd <- nrow(treated3) 

sub_1416_temp <- sub_1416_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated4 <- sub_1416_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight4_wd <- nrow(treated4) 

sub_1618_temp <- sub_1618_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated5 <- sub_1618_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight5_wd <- nrow(treated5) 

sub_1820_temp <- sub_1820_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated6 <- sub_1820_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight6_wd <- nrow(treated6) 

sub_2022_temp <- sub_2022_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated7 <- sub_2022_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight7_wd <- nrow(treated7) 

sub_2224_temp <- sub_2224_wd.2 %>%
  filter(!is.na(state_dist)&!is.na(approval_rep)&!is.na(nonwhite_mc)) %>%
  group_by(state_dist) %>%
  mutate(pct_nonwhite_mc = as.numeric(nonwhite_mc)) %>%
  summarise_at(vars(pct_nonwhite_mc), mean, na.rm=T)
treated8 <- sub_2224_temp %>% filter(pct_nonwhite_mc > 0 & pct_nonwhite_mc < 1)
weight8_wd <- nrow(treated8) 


# Create Function to run regression with different datasets and retrieve coefficient 
bivariate <- function(data){
  model <- felm(approval_rep ~ rr_index_flip + rr_index_flip*nonwhite_mc + mc_seniority + mc_gender
                | cong + dist_party_mc | 0 | 
                  state_dist, data=data, weights=data$weight)
  mod.coeff.rr <- model$coefficient[[1]]
  mod.coeff.nonwhite <- model$coefficient[[2]]
  mod.coeff.interaction <- model$coefficient[[5]]
  cbind(mod.coeff.rr, mod.coeff.nonwhite, mod.coeff.interaction)
}

# regressions for each subset 
full_basic_wd <- bivariate(df_even_w_dem.index)
#basic_0810_wd <- bivariate(sub_0810_wd.2)
basic_1012_wd <- bivariate(sub_1012_wd.2) # rank deficient
basic_1214_wd <- bivariate(sub_1214_wd.2)
basic_1416_wd <- bivariate(sub_1416_wd.2) # rank deficient
basic_1618_wd <- bivariate(sub_1618_wd.2) # rank deficient 
basic_1820_wd <- bivariate(sub_1820_wd.2)
basic_2022_wd <- bivariate(sub_2022_wd.2)
basic_2224_wd <- bivariate(sub_2224_wd.2)


# weighted mean of all the subsets for rr_index
effect_wd.rr <- weighted.mean(c(basic_1012_wd[[1]],basic_1214_wd[[1]], basic_1416_wd[[1]],basic_1618_wd[[1]],
                                basic_1820_wd[[1]], basic_2022_wd[[1]], basic_2224_wd[[1]]),
                              c(weight2_wd, weight3_wd, weight4_wd, weight5_wd, weight6_wd, weight7_wd, weight8_wd))
effect_wd.rr <- weighted.mean(c(basic_1214_wd[[1]], 
                                basic_1820_wd[[1]], basic_2022_wd[[1]], basic_2224_wd[[1]]),
                              c(weight3_wd, weight6_wd, weight7_wd, weight8_wd))

# weighted mean of all the subsets for nonwhite_mc 
effect_wd.nw <- weighted.mean(c(basic_1012_wd[[2]],basic_1214_wd[[2]], basic_1416_wd[[2]],basic_1618_wd[[2]],
                                basic_1820_wd[[2]], basic_2022_wd[[2]], basic_2224_wd[[2]]),
                              c(weight2_wd, weight3_wd, weight4_wd, weight5_wd, weight6_wd, weight7_wd, weight8_wd))
effect_wd.nw <- weighted.mean(c(basic_1214_wd[[2]], 
                                basic_1820_wd[[2]], basic_2022_wd[[2]], basic_2224_wd[[2]]),
                              c(weight3_wd, weight6_wd, weight7_wd, weight8_wd))

# weighted mean of all the subsets for interaction  
effect_wd.int <- weighted.mean(c(basic_1012_wd[[3]],basic_1214_wd[[3]], basic_1416_wd[[3]],basic_1618_wd[[3]],
                                 basic_1820_wd[[3]], basic_2022_wd[[3]], basic_2224_wd[[3]]),
                               c(weight2_wd, weight3_wd, weight4_wd, weight5_wd, weight6_wd, weight7_wd, weight8_wd))
effect_wd.int <- weighted.mean(c(basic_1214_wd[[3]], 
                                 basic_1820_wd[[3]], basic_2022_wd[[3]], basic_2224_wd[[3]]),
                               c(weight3_wd, weight6_wd, weight7_wd, weight8_wd))


basic_row_wd.rr <- cbind(effect_wd.rr, full_basic_wd[[1]])
basic_row_wd.nw <- cbind(effect_wd.nw, full_basic_wd[[2]])
basic_row_wd.int <- cbind(effect_wd.int, full_basic_wd[[3]])

basic_row_wd <- rbind(basic_row_wd.rr, basic_row_wd.nw, basic_row_wd.int)

# putting into table 
colnames(basic_row_wd) <- c("Wtd. Avg.", "Main Model")
rownames(basic_row_wd) <- c("Racial Resentment", "POC MC", "RR x POC MC")

stargazer(basic_row_wd, type="latex", header = FALSE, 
          title = "Alternative model comparison to demonstrate TWFE", style = "apsr")

#
## Appendix I: Partisan Strength & Ideological Strength Tables----
# Fig I1: Partisan Figure ---- 
# generate subsets for Dem strength
df_even_w_dem_lean <- df_even_w_dem %>%
  filter(pid7 == 3)
df_even_w_dem_weak <- df_even_w_dem %>%
  filter(pid7 == 2)
df_even_w_dem_strong <- df_even_w_dem %>%
  filter(pid7 == 1)

## bivariate + district-level controls by year 
# Leaner Dems 
mod.leand.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_lean, weights=df_even_w_dem_lean$weight)
# Weak Dems 
mod.weakd.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_weak, weights=df_even_w_dem_weak$weight)
# Strong Dems 
mod.strongd.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                       | dist_party_mc | 0 | state_dist, data=df_even_w_dem_strong, weights=df_even_w_dem_strong$weight)

# generate table of values to plot 
mod.lean_yr_df <- broom::tidy(mod.leand.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                 "factor(year)2010:nonwhite_mc",
                                                                 "factor(year)2012:nonwhite_mc",
                                                                 "factor(year)2014:nonwhite_mc",
                                                                 "factor(year)2016:nonwhite_mc",
                                                                 "factor(year)2018:nonwhite_mc",
                                                                 "factor(year)2020:nonwhite_mc",
                                                                 "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Leaners",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

mod.weak_yr_df <- broom::tidy(mod.weakd.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                 "factor(year)2010:nonwhite_mc",
                                                                 "factor(year)2012:nonwhite_mc",
                                                                 "factor(year)2014:nonwhite_mc",
                                                                 "factor(year)2016:nonwhite_mc",
                                                                 "factor(year)2018:nonwhite_mc",
                                                                 "factor(year)2020:nonwhite_mc",
                                                                 "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Weak Democrats",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

mod.strong_yr_df <- broom::tidy(mod.strongd.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                                     "factor(year)2010:nonwhite_mc",
                                                                     "factor(year)2012:nonwhite_mc",
                                                                     "factor(year)2014:nonwhite_mc",
                                                                     "factor(year)2016:nonwhite_mc",
                                                                     "factor(year)2018:nonwhite_mc",
                                                                     "factor(year)2020:nonwhite_mc",
                                                                     "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Strong Democrats",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

# create tibble with all regression output
models.strength <- rbind(mod.strong_yr_df, mod.weak_yr_df, mod.lean_yr_df)

# plot coefficients
year.strength <- dwplot(models.strength,
                        vline = geom_vline(
                          xintercept = 0,
                          colour = "grey60",
                          linetype = 3
                        ),
                        dot_args = list(aes(shape = model), size=3),
                        whisker_args = list(aes(color = model))) +
  theme_bw() + ylab("") +
  #ggtitle("Marginal Effect of POC MC on MC Approval by  by Partisan Strength and Year") +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle=0, vjust=.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10))+
  scale_colour_manual(values = c("lightgrey","darkgrey", "black")) +
  scale_y_discrete(limits=rev) +
  #xlab("Marginal Effect of \nPOC MC on Approval") +
  ylab("Year") +
  coord_flip() +
  #ggtitle("By Party Strength") +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model")) 

year.strength

# Fig I2: Ideo figure ---- 
## By ideology among Dems ##

# generate subsets for ideological groups among Dems 
df_even_w_dem_ideo1 <- df_even_w_dem %>%
  filter(ideo5 == 1)
df_even_w_dem_ideo2 <- df_even_w_dem %>%
  filter(ideo5 == 2)
df_even_w_dem_ideo3 <- df_even_w_dem %>%
  filter(ideo5 == 3)
df_even_w_dem_ideo4 <- df_even_w_dem %>%
  filter(ideo5 == 4)
df_even_w_dem_ideo5 <- df_even_w_dem %>%
  filter(ideo5 == 5)


## bivariate + district-level controls by year 
# Ideo1
mod.ideo1.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_ideo1, weights=df_even_w_dem_ideo1$weight)
# Ideo2 
mod.ideo2.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_ideo2, weights=df_even_w_dem_ideo2$weight)
# Ideo3
mod.ideo3.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_ideo3, weights=df_even_w_dem_ideo3$weight)
# Ideo4
mod.ideo4.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_ideo4, weights=df_even_w_dem_ideo4$weight)
# Ideo5
mod.ideo5.yr <- felm(approval_rep ~ factor(year) + mc_seniority + mc_gender + nonwhite_mc: factor(year)
                     | dist_party_mc | 0 | state_dist, data=df_even_w_dem_ideo5, weights=df_even_w_dem_ideo5$weight)


# generate table of values to plot 
mod.ideo1_df <- broom::tidy(mod.ideo1.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                               "factor(year)2010:nonwhite_mc",
                                                               "factor(year)2012:nonwhite_mc",
                                                               "factor(year)2014:nonwhite_mc",
                                                               "factor(year)2016:nonwhite_mc",
                                                               "factor(year)2018:nonwhite_mc",
                                                               "factor(year)2020:nonwhite_mc",
                                                               "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Very liberal",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

mod.ideo2_df <- broom::tidy(mod.ideo2.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                               "factor(year)2010:nonwhite_mc",
                                                               "factor(year)2012:nonwhite_mc",
                                                               "factor(year)2014:nonwhite_mc",
                                                               "factor(year)2016:nonwhite_mc",
                                                               "factor(year)2018:nonwhite_mc",
                                                               "factor(year)2020:nonwhite_mc",
                                                               "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Liberal",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

mod.ideo3_df <- broom::tidy(mod.ideo3.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                               "factor(year)2010:nonwhite_mc",
                                                               "factor(year)2012:nonwhite_mc",
                                                               "factor(year)2014:nonwhite_mc",
                                                               "factor(year)2016:nonwhite_mc",
                                                               "factor(year)2018:nonwhite_mc",
                                                               "factor(year)2020:nonwhite_mc",
                                                               "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Moderate",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

mod.ideo4_df <- broom::tidy(mod.ideo4.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                               "factor(year)2010:nonwhite_mc",
                                                               "factor(year)2012:nonwhite_mc",
                                                               "factor(year)2014:nonwhite_mc",
                                                               "factor(year)2016:nonwhite_mc",
                                                               "factor(year)2018:nonwhite_mc",
                                                               "factor(year)2020:nonwhite_mc",
                                                               "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Conservative",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

mod.ideo5_df <- broom::tidy(mod.ideo5.yr) %>% filter(term%in%c("factor(year)2008:nonwhite_mc",
                                                               "factor(year)2010:nonwhite_mc",
                                                               "factor(year)2012:nonwhite_mc",
                                                               "factor(year)2014:nonwhite_mc",
                                                               "factor(year)2016:nonwhite_mc",
                                                               "factor(year)2018:nonwhite_mc",
                                                               "factor(year)2020:nonwhite_mc",
                                                               "factor(year)2022:nonwhite_mc")) %>% 
  mutate(model = "Very Conservative",
         term = case_when(term=="factor(year)2008:nonwhite_mc"~"2008",
                          term=="factor(year)2010:nonwhite_mc"~"2010",
                          term=="factor(year)2012:nonwhite_mc"~"2012",
                          term=="factor(year)2014:nonwhite_mc"~"2014",
                          term=="factor(year)2016:nonwhite_mc"~"2016",
                          term=="factor(year)2018:nonwhite_mc"~"2018",
                          term=="factor(year)2020:nonwhite_mc"~"2020",
                          term=="factor(year)2022:nonwhite_mc"~"2022"))

# create tibble with all regression output
models.ideo <- rbind(mod.ideo1_df, mod.ideo2_df, mod.ideo3_df, mod.ideo4_df, mod.ideo5_df)

models.ideo.l <- rbind(mod.ideo1_df, mod.ideo2_df, mod.ideo3_df)
models.ideo.c <- rbind(mod.ideo4_df, mod.ideo5_df)


# plot coefficients
year.ideo.strength <- dwplot(models.ideo.l,
                             vline = geom_vline(
                               xintercept = 0,
                               colour = "grey60",
                               linetype = 3
                             ),
                             dot_args = list(aes(shape = model), size=3),
                             whisker_args = list(aes(color = model))) +
  theme_bw() + ylab("") +
  #ggtitle("Marginal Effect of POC MC on MC Approval by  by Partisan Strength and Year") +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle=0, vjust=.5),
        legend.text = element_text(size = 10),
        legend.position = "bottom",
        legend.margin=margin(0),
        legend.box.margin=margin(-10))+
  #scale_colour_manual(values = c("lightgrey","darkgrey", "black")) +
  scale_color_grey()+
  scale_y_discrete(limits=rev) +
  #xlab("Marginal Effect of \nPOC MC on Approval") +
  ylab("Year") +
  coord_flip() +
  guides(shape = guide_legend("model"), 
         colour = guide_legend("model"))

year.ideo.strength


## Appendix J: Perceived incongruence Table (Figure 3) ----

# restrict to D and R 
df_even_w.2 <- df_even_w_dem %>%
  filter(mc_party==0 | mc_party==1) %>%
  mutate(mc_party = as.character(mc_party)) %>%
  filter(nonwhite_mc==0 | nonwhite_mc==1) %>%
  mutate(nonwhite_mc = as.character(nonwhite_mc))

avg.perceived.con <- df_even_w.2 %>%
  select(incongruence_ideo2, year, mc_party, nonwhite_mc) %>%
  filter(year != 2008) %>%
  group_by(year, mc_party, nonwhite_mc) %>%
  summarise(avg. = mean(incongruence_ideo2, na.rm = TRUE),
            ci_low = mean(incongruence_ideo2, na.rm = TRUE) - 1.96 * 
              (sd(incongruence_ideo2, na.rm = TRUE) / sqrt(n())),
            ci_high = mean(incongruence_ideo2, na.rm = TRUE) + 1.96 * 
              (sd(incongruence_ideo2, na.rm = TRUE) / sqrt(n()))) %>%
  pivot_wider( names_from = nonwhite_mc,
               values_from = c(avg., ci_low, ci_high))  %>%
  mutate(mc_party = case_when(mc_party==1 ~ "Democratic MCs",
                              mc_party==0 ~ "Republican MCs"),
         dif.race = avg._1 - avg._0)

# just estimates for table 
avg.perceived.tab <- avg.perceived.con %>%
  select(dif.race, year, mc_party)  %>%
  mutate(dif.race = round(dif.race, 3))%>%
  pivot_wider(names_from = year, 
              values_from = dif.race)

# export as table 
kbl(avg.perceived.tab, caption="Difference in Means (between POC and White MCs) for Perceived Ideological Incon-
gruence by MC Party (White Democratic Respondents)", "latex", booktabs = T)

#
## Appendix K: (Jackie Robinson) ----
# Figure K1: Education ----
df.mc <- read.csv("./Full_MCdata_110-118.v2.csv")

table(df.mc$nonwhite_mc, df.mc$mc_year)

# highest ed bachelors over time 
average <- df.mc %>%
  select(highest_degree, nonwhite_mc, mc_year, mc_party) %>%
  filter(mc_party==1) %>%
  group_by(nonwhite_mc, mc_year, highest_degree) %>%
  summarise(total.bach = n()) %>%
  mutate(pct = case_when(nonwhite_mc==0 & mc_year==2007 ~ round((total.bach/376)*100,2),
                         nonwhite_mc==1 & mc_year==2007 ~ round((total.bach/71)*100, 2),
                         nonwhite_mc==0 & mc_year==2009 ~ round((total.bach/364)*100,2),
                         nonwhite_mc==1 & mc_year==2009 ~ round((total.bach/71)*100, 2),
                         nonwhite_mc==0 & mc_year==2011 ~ round((total.bach/360)*100,2),
                         nonwhite_mc==1 & mc_year==2011 ~ round((total.bach/75)*100, 2),
                         nonwhite_mc==0 & mc_year==2013 ~ round((total.bach/354)*100,2),
                         nonwhite_mc==1 & mc_year==2013 ~ round((total.bach/81)*100, 2),
                         nonwhite_mc==0 & mc_year==2015 ~ round((total.bach/349)*100,2),
                         nonwhite_mc==1 & mc_year==2015 ~ round((total.bach/86)*100, 2),
                         nonwhite_mc==0 & mc_year==2017 ~ round((total.bach/339)*100,2),
                         nonwhite_mc==1 & mc_year==2017 ~ round((total.bach/96)*100, 2),
                         nonwhite_mc==0 & mc_year==2019 ~ round((total.bach/326)*100,2),
                         nonwhite_mc==1 & mc_year==2019 ~ round((total.bach/111)*100, 2))) %>%
  rename(model = nonwhite_mc) %>%
  mutate(model = case_when(model==0 ~ "White",
                           model==1 ~ "POC")) %>%
  filter(!is.na(model)) %>%
  mutate(year = case_when(mc_year == 2007 ~ 2008,
                          mc_year == 2009 ~ 2010,
                          mc_year == 2011 ~ 2012,
                          mc_year == 2013 ~ 2014,
                          mc_year == 2015 ~ 2016,
                          mc_year == 2017 ~ 2018,
                          mc_year == 2019 ~ 2020)) 

# Education -- percentages over time split 
average.poc <- average %>%
  filter(model == "POC") %>%
  mutate(highest_degree = str_to_title(highest_degree)) %>%
  rename(pct.of.poc = pct)

average.white <- average %>%
  filter(model == "White") %>%
  mutate(highest_degree = str_to_title(highest_degree)) %>%
  rename(pct.of.white = pct)

averages <- average.white %>%
  left_join(average.poc, by=c("mc_year", "highest_degree", "year")) %>%
  mutate(pct.dif = pct.of.poc - pct.of.white)

ggplot(averages, aes(year, pct.dif, linetype = highest_degree)) +
  geom_point() + geom_line() +
  scale_linetype_discrete(name = "") + 
  theme_bw() +
  scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018, 2020)) +
  theme(plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white"),
        legend.position = "bottom",
        legend.title = element_blank(),
        plot.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        legend.text = element_text(size = 10),
        legend.margin=margin(0),
        legend.box.margin=margin(-10)) +
  labs(x = "Years", y = "POC-White Difference in Percent\nof each Highest Degree") 

# Figure K2: full distribution of effectiveness scores -----
df_even_w.2 <- df_even_w %>%
  filter(party7 %in% c(1, 3)) %>%
  mutate(
    party7 = case_when(
      party7 == 1 ~ "White Democratic Respondents",
      party7 == 3 ~ "White Republican Respondents"
    ),
    nonwhite_mc = case_when(
      nonwhite_mc == 1 ~ "POC MC",
      nonwhite_mc == 0 ~ "White MC",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(nonwhite_mc), year != 2008)

PanelA <- ggplot(df_even_w.2, aes(x = factor(year), y = mc_effectiveness_new, 
                        fill = nonwhite_mc)) +
  geom_boxplot(outlier.size = 0.8, outlier.color= "lightgray", width = 0.6, position = position_dodge(width = 0.8)) +
  facet_wrap(~party7) +
  scale_fill_grey(start = 0.3, end = 0.7) +
  theme_bw() +
  labs(
    x = "Year",
    y = "MC Effectiveness",
    title = "Panel A: Distribution of MC Effectiveness by Year and MC Race"
  ) +
  theme(
    plot.title = element_text(size = 12, face = "bold"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12, angle = 0, vjust = 0.5),
    legend.position = "bottom",
    legend.title = element_blank()
  )


# restrict to D and R 
df.mc2 <- df.mc %>%
  # rescale effectiveness to 0-1 
  mutate(effectiveness_scale = scales::rescale(mc_effectiveness_new))

# Calculate means and standard errors for white respondents
df.mc2.box <- df.mc2 %>%
  filter(!is.na(mc_effectiveness_new)) %>%
  mutate(
    year = mc_year + 1,
    nonwhite_mc = case_when(
      nonwhite_mc == 1 ~ "POC MC",
      nonwhite_mc == 0 ~ "White MC",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(nonwhite_mc))

# Create the plot with error bars and larger points
PanelB <- ggplot(df.mc2.box, aes(x = factor(year), y = mc_effectiveness_new, fill = nonwhite_mc)) +
  geom_boxplot(outlier.size = 0.8, width = 0.6,, outlier.color = "lightgray", position = position_dodge(width = 0.8)) +
  scale_fill_grey(start = 0.3, end = 0.7) +
  theme_bw() +
  labs(
    x = "Year",
    y = "MC Effectiveness",
    title = "Panel B: Distribution of MC Effectiveness Scores by Year and Race"
  ) +
  theme(
    plot.title = element_text(size = 12, face = "bold"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12, angle = 0, vjust = 0.5),
    legend.position = "bottom",
    legend.title = element_blank()
  )


# Figure K2: A&B together ---- 
ggarrange(PanelA, PanelB, ncol=1, common.legend = TRUE, legend = "bottom")


#